From 21d0b845ab2c1099145e3ea00f6db477eaceeabf Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 11 Oct 2021 14:41:31 +1300 Subject: [PATCH 1/5] bump 2.3.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 45918b7..75e855f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ScientificTypes" uuid = "321657f4-b219-11e9-178b-2701a2544e81" authors = ["Anthony D. Blaom "] -version = "2.3.1" +version = "2.3.2" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 068eb766f9cb9a107c68f0de45e3d7a5f7746eb5 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 11 Oct 2021 14:45:24 +1300 Subject: [PATCH 2/5] rm CorpusLoaders as dep --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 75e855f..1ab7dbc 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,6 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] CategoricalArrays = "0.8, 0.9, 0.10" ColorTypes = "0.9, 0.10, 0.11" -CorpusLoaders = "0.3.2" Distributions = "0.25.1" PersistenceDiagramsBase = "0.1" PrettyTables = "1" From 11f06ba6e43188d8296880e1d8a3a01e2a8c6d23 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 11 Oct 2021 15:23:38 +1300 Subject: [PATCH 3/5] rm scitype() overloading for text-analysis and rm CorpusLoaders dep --- src/convention/scitype.jl | 30 ------------------------- test/scitypes.jl | 47 --------------------------------------- 2 files changed, 77 deletions(-) diff --git a/src/convention/scitype.jl b/src/convention/scitype.jl index 8fc218a..76c698f 100644 --- a/src/convention/scitype.jl +++ b/src/convention/scitype.jl @@ -66,34 +66,6 @@ ST.scitype(::Distributions.Sampleable{F,S}) where {F,S} = ST.scitype(::Distributions.Distribution{F,S}) where {F,S} = Density{space_scitype(F,S)} -# Text analysis - EXPERIMENTAL - -# This would be less of a hack if some of #155 were adopted. - -type2scitype(T::Type) = ST.Scitype(T, DefaultConvention()) -type2scitype(::Type{<:AbstractVector{T}}) where T = - AbstractVector{type2scitype(T)} -type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}} -const PlainNGram{N} = NTuple{N,<:AbstractString} -const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord} -ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual} -ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T = - Annotated{AbstractVector{type2scitype(T)}} -ST.scitype(::AbstractDict{<:AbstractString,<:Integer}, - ::DefaultConvention) = Multiset{Textual} -ST.scitype(::AbstractDict{<:TaggedWord,<:Integer}, - ::DefaultConvention) = Multiset{Annotated{Textual}} -ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer}, - ::DefaultConvention) = - Multiset{Union{Textual,Annotated{Textual}}} -ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N = - Multiset{NTuple{N,Textual}} -ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N = - Multiset{NTuple{N,Annotated{Textual}}} -ST.scitype(::AbstractDict{<:PlainNGram}) = - Multiset{NTuple{<:Any,Textual}} -ST.scitype(::AbstractDict{<:TaggedNGram}) = - Multiset{NTuple{<:Any,Annotated{Textual}}} # Scitype for fast array broadcasting @@ -105,5 +77,3 @@ ST.Scitype(::Type{<:Date}, ::DefaultConvention) = ScientificDate ST.Scitype(::Type{<:Time}, ::DefaultConvention) = ScientificTime ST.Scitype(::Type{<:DateTime}, ::DefaultConvention) = ScientificDateTime ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram -ST.Scitype(::Type{<:TaggedWord}, ::DefaultConvention) = - Annotated{Textual} diff --git a/test/scitypes.jl b/test/scitypes.jl index c1ca4ce..a2cd685 100644 --- a/test/scitypes.jl +++ b/test/scitypes.jl @@ -240,53 +240,6 @@ end @test scitype(FooSampleable()) == Sampleable{Count} end -@testset "text analysis" begin - tagged_word = CorpusLoaders.PosTaggedWord("NN", "wheelbarrow") - tagged_word2 = CorpusLoaders.PosTaggedWord("NN", "soil") - @test scitype(tagged_word) == Annotated{Textual} - bag_of_words = Dict("cat"=>1, "dog"=>3) - @test scitype(bag_of_words) == Multiset{Textual} - bag_of_tagged_words = Dict(tagged_word => 5) - @test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}} - @test scitype(Document("My Document", "kadsfkj")) == Unknown - @test scitype(Document([tagged_word, tagged_word2])) == - Annotated{AbstractVector{Annotated{Textual}}} - @test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) == - Annotated{AbstractVector{Annotated{Textual}}} - nested_tokens = [["dog", "cat"], ["bird", "cat"]] - @test scitype(Document("Essay Number 1", nested_tokens)) == - Annotated{AbstractVector{AbstractVector{Textual}}} - - @test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}} - bag_of_words = Dict("cat in" => 1, - "the hat" => 1, - "the" => 2, - "cat" => 1, - "hat" => 1, - "in the" => 1, - "in" => 1, - "the cat" => 1) - bag_of_ngrams = - Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words) - # Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries: - # ("cat",) => 1 - # ("cat", "in") => 1 - # ("in",) => 1 - # ("the", "hat") => 1 - # ("the",) => 2 - # ("hat",) => 1 - # ("in", "the") => 1 - # ("the", "cat") => 1 - @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}} - - @test scitype(Dict((tagged_word, tagged_word2) => 3)) == - Multiset{Tuple{Annotated{Textual},Annotated{Textual}}} - bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3, - (tagged_word,) => 7) - @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}} - -end - @testset "Autotype+tight" begin x = [1,2,3,missing]; x = x[1:3] From f7c4bfe579494bdb3049d09ea727fae2719adc64 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 11 Oct 2021 15:50:47 +1300 Subject: [PATCH 4/5] rm persistence diagram overloading of scitype --- Project.toml | 2 -- src/convention/scitype.jl | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 1ab7dbc..f1d3579 100644 --- a/Project.toml +++ b/Project.toml @@ -9,7 +9,6 @@ ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -PersistenceDiagramsBase = "b1ad91c1-539c-4ace-90bd-ea06abc420fa" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161" @@ -20,7 +19,6 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CategoricalArrays = "0.8, 0.9, 0.10" ColorTypes = "0.9, 0.10, 0.11" Distributions = "0.25.1" -PersistenceDiagramsBase = "0.1" PrettyTables = "1" Reexport = "1.2" ScientificTypesBase = "2.2" diff --git a/src/convention/scitype.jl b/src/convention/scitype.jl index 76c698f..401ddc3 100644 --- a/src/convention/scitype.jl +++ b/src/convention/scitype.jl @@ -16,7 +16,7 @@ ColorImage{size(img)...} # Persistence diagrams -ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram +# ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram # CategoricalArray scitype @@ -76,4 +76,4 @@ ST.Scitype(::Type{<:TimeType}, ::DefaultConvention) = ScientificTimeTy ST.Scitype(::Type{<:Date}, ::DefaultConvention) = ScientificDate ST.Scitype(::Type{<:Time}, ::DefaultConvention) = ScientificTime ST.Scitype(::Type{<:DateTime}, ::DefaultConvention) = ScientificDateTime -ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram +# ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram From 41366c1e5efdca2d81f15a5f9b79a7d71eb2bae3 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 11 Oct 2021 16:02:25 +1300 Subject: [PATCH 5/5] rm persistent diagram scitype() overloadings and rm dependency --- src/ScientificTypes.jl | 1 - test/basic_tests.jl | 12 ++++++------ test/runtests.jl | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ScientificTypes.jl b/src/ScientificTypes.jl index 052f0fe..9c0e730 100644 --- a/src/ScientificTypes.jl +++ b/src/ScientificTypes.jl @@ -6,7 +6,6 @@ using Reexport using Tables using CategoricalArrays using ColorTypes -using PersistenceDiagramsBase using CorpusLoaders using PrettyTables using Dates diff --git a/test/basic_tests.jl b/test/basic_tests.jl index 539b6c0..33b09d8 100644 --- a/test/basic_tests.jl +++ b/test/basic_tests.jl @@ -86,13 +86,13 @@ end @test scitype(gray_image) == GrayImage{10,20} end -@testset "PersistenceDiagrams" begin - diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0) - @test scitype(diagram) == PersistenceDiagram +# @testset "PersistenceDiagrams" begin +# diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0) +# @test scitype(diagram) == PersistenceDiagram - diagrams = [diagram, diagram, diagram] - @test scitype(diagrams) == Vec{PersistenceDiagram} -end +# diagrams = [diagram, diagram, diagram] +# @test scitype(diagrams) == Vec{PersistenceDiagram} +# end @testset "temporal types" begin d = Date(2020, 4, 21) diff --git a/test/runtests.jl b/test/runtests.jl index 9e89a2f..9be81f2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Test, ScientificTypes, ScientificTypesBase, Random using Tables, CategoricalArrays, DataFrames -using ColorTypes, PersistenceDiagramsBase, CorpusLoaders +using ColorTypes, CorpusLoaders using Dates # using CSV # dropped until julia release new LTS as issue for 1.0 import Distributions