diff --git a/Project.toml b/Project.toml index 45918b7..f1d3579 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ScientificTypes" uuid = "321657f4-b219-11e9-178b-2701a2544e81" authors = ["Anthony D. Blaom "] -version = "2.3.1" +version = "2.3.2" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -9,7 +9,6 @@ ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -PersistenceDiagramsBase = "b1ad91c1-539c-4ace-90bd-ea06abc420fa" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161" @@ -19,9 +18,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] CategoricalArrays = "0.8, 0.9, 0.10" ColorTypes = "0.9, 0.10, 0.11" -CorpusLoaders = "0.3.2" Distributions = "0.25.1" -PersistenceDiagramsBase = "0.1" PrettyTables = "1" Reexport = "1.2" ScientificTypesBase = "2.2" diff --git a/src/ScientificTypes.jl b/src/ScientificTypes.jl index 052f0fe..9c0e730 100644 --- a/src/ScientificTypes.jl +++ b/src/ScientificTypes.jl @@ -6,7 +6,6 @@ using Reexport using Tables using CategoricalArrays using ColorTypes -using PersistenceDiagramsBase using CorpusLoaders using PrettyTables using Dates diff --git a/src/convention/scitype.jl b/src/convention/scitype.jl index 8fc218a..401ddc3 100644 --- a/src/convention/scitype.jl +++ b/src/convention/scitype.jl @@ -16,7 +16,7 @@ ColorImage{size(img)...} # Persistence diagrams -ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram +# ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram # CategoricalArray scitype @@ -66,34 +66,6 @@ ST.scitype(::Distributions.Sampleable{F,S}) where {F,S} = ST.scitype(::Distributions.Distribution{F,S}) where {F,S} = Density{space_scitype(F,S)} -# Text analysis - EXPERIMENTAL - -# This would be less of a hack if some of #155 were adopted. - -type2scitype(T::Type) = ST.Scitype(T, DefaultConvention()) -type2scitype(::Type{<:AbstractVector{T}}) where T = - AbstractVector{type2scitype(T)} -type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}} -const PlainNGram{N} = NTuple{N,<:AbstractString} -const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord} -ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual} -ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T = - Annotated{AbstractVector{type2scitype(T)}} -ST.scitype(::AbstractDict{<:AbstractString,<:Integer}, - ::DefaultConvention) = Multiset{Textual} -ST.scitype(::AbstractDict{<:TaggedWord,<:Integer}, - ::DefaultConvention) = Multiset{Annotated{Textual}} -ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer}, - ::DefaultConvention) = - Multiset{Union{Textual,Annotated{Textual}}} -ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N = - Multiset{NTuple{N,Textual}} -ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N = - Multiset{NTuple{N,Annotated{Textual}}} -ST.scitype(::AbstractDict{<:PlainNGram}) = - Multiset{NTuple{<:Any,Textual}} -ST.scitype(::AbstractDict{<:TaggedNGram}) = - Multiset{NTuple{<:Any,Annotated{Textual}}} # Scitype for fast array broadcasting @@ -104,6 +76,4 @@ ST.Scitype(::Type{<:TimeType}, ::DefaultConvention) = ScientificTimeTy ST.Scitype(::Type{<:Date}, ::DefaultConvention) = ScientificDate ST.Scitype(::Type{<:Time}, ::DefaultConvention) = ScientificTime ST.Scitype(::Type{<:DateTime}, ::DefaultConvention) = ScientificDateTime -ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram -ST.Scitype(::Type{<:TaggedWord}, ::DefaultConvention) = - Annotated{Textual} +# ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram diff --git a/test/basic_tests.jl b/test/basic_tests.jl index 539b6c0..33b09d8 100644 --- a/test/basic_tests.jl +++ b/test/basic_tests.jl @@ -86,13 +86,13 @@ end @test scitype(gray_image) == GrayImage{10,20} end -@testset "PersistenceDiagrams" begin - diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0) - @test scitype(diagram) == PersistenceDiagram +# @testset "PersistenceDiagrams" begin +# diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0) +# @test scitype(diagram) == PersistenceDiagram - diagrams = [diagram, diagram, diagram] - @test scitype(diagrams) == Vec{PersistenceDiagram} -end +# diagrams = [diagram, diagram, diagram] +# @test scitype(diagrams) == Vec{PersistenceDiagram} +# end @testset "temporal types" begin d = Date(2020, 4, 21) diff --git a/test/runtests.jl b/test/runtests.jl index 9e89a2f..9be81f2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Test, ScientificTypes, ScientificTypesBase, Random using Tables, CategoricalArrays, DataFrames -using ColorTypes, PersistenceDiagramsBase, CorpusLoaders +using ColorTypes, CorpusLoaders using Dates # using CSV # dropped until julia release new LTS as issue for 1.0 import Distributions diff --git a/test/scitypes.jl b/test/scitypes.jl index c1ca4ce..a2cd685 100644 --- a/test/scitypes.jl +++ b/test/scitypes.jl @@ -240,53 +240,6 @@ end @test scitype(FooSampleable()) == Sampleable{Count} end -@testset "text analysis" begin - tagged_word = CorpusLoaders.PosTaggedWord("NN", "wheelbarrow") - tagged_word2 = CorpusLoaders.PosTaggedWord("NN", "soil") - @test scitype(tagged_word) == Annotated{Textual} - bag_of_words = Dict("cat"=>1, "dog"=>3) - @test scitype(bag_of_words) == Multiset{Textual} - bag_of_tagged_words = Dict(tagged_word => 5) - @test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}} - @test scitype(Document("My Document", "kadsfkj")) == Unknown - @test scitype(Document([tagged_word, tagged_word2])) == - Annotated{AbstractVector{Annotated{Textual}}} - @test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) == - Annotated{AbstractVector{Annotated{Textual}}} - nested_tokens = [["dog", "cat"], ["bird", "cat"]] - @test scitype(Document("Essay Number 1", nested_tokens)) == - Annotated{AbstractVector{AbstractVector{Textual}}} - - @test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}} - bag_of_words = Dict("cat in" => 1, - "the hat" => 1, - "the" => 2, - "cat" => 1, - "hat" => 1, - "in the" => 1, - "in" => 1, - "the cat" => 1) - bag_of_ngrams = - Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words) - # Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries: - # ("cat",) => 1 - # ("cat", "in") => 1 - # ("in",) => 1 - # ("the", "hat") => 1 - # ("the",) => 2 - # ("hat",) => 1 - # ("in", "the") => 1 - # ("the", "cat") => 1 - @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}} - - @test scitype(Dict((tagged_word, tagged_word2) => 3)) == - Multiset{Tuple{Annotated{Textual},Annotated{Textual}}} - bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3, - (tagged_word,) => 7) - @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}} - -end - @testset "Autotype+tight" begin x = [1,2,3,missing]; x = x[1:3]