From a0a87defc402150b3aeb4543a3627e9d3ed2795d Mon Sep 17 00:00:00 2001
From: StevellM <muller@math.uni-sb.de>
Date: Tue, 10 Oct 2023 17:15:15 +0200
Subject: [PATCH] fix union + add sum/intersect and difference

---
 docs/src/features/mset.md      |  49 +++--
 src/Misc/MSet.jl               | 341 ++++++++++++++++++++++++++-------
 src/NumField/NfAbs/Elem.jl     |   2 +-
 src/NumField/NfAbs/PolyFact.jl |   2 +-
 test/Misc/MSet.jl              |  14 +-
 5 files changed, 318 insertions(+), 90 deletions(-)

diff --git a/docs/src/features/mset.md b/docs/src/features/mset.md
index 90ba63106e..3a3adc1588 100644
--- a/docs/src/features/mset.md
+++ b/docs/src/features/mset.md
@@ -27,21 +27,39 @@ multiset
 
 ### Functions
 
-Existing functions for any collection of objects which are currently available
+One can iterate over an `MSet` as on a regular `Set`. Here is moreover a list
+of functions defined for collections of objects which are currently available
 for `MSet`:
 
-* `similar`
+* `==`
+* `all`
+* `any`
+* `copy`
+* `delete!`
+* `eltype`
+* `filter`
+* `filter!`
+* `in`
+* `intersect`
+* `intersect!`
 * `isempty`
+* `issubset`
 * `length`
-* `eltype`
+* `pop!`
 * `push!`
-* `copy`
-* `==`
+* `setdiff`
+* `setdiff!`
+* `similar`
 * `unique`
+* `union`
+* `union!`
+* ...
 
-One can also iterate over an `MSet`, and use `filter` for a given predicate on
-the keys of the underlying dictionary (not on their values!). One can also test
-containment.
+Note that `pop!` and `delete!` for `MSet` are available but have a different behaviour.
+For an element `x` in an multi-set `M <: MSet`, then `pop!(M, x)` will remove
+*one* instance of `x` in `M` - in particular `multiplicity(M, x)` will drop by
+$1$. Much stronger, `delete!(M, x)` will remove *all* instances of `x` in `M` and
+so `multiplicity(M, x)` will be $0$.
 
 While `unique` will return the keys of the underlying dictionary, one can access
 the values (i.e. the multiplicities of the elements in the multi-set) via the
@@ -52,16 +70,13 @@ multiplicities(::MSet)
 multiplicity(::MSet{T}, ::T) where T
 ```
 
-Note that `pop!` and `delete!` for `MSet` are available but have a different behaviour.
-For an element `x` in an multi-set `M <: MSet`, then `pop!(M, x)` will remove
-*one* instance of `x` in `M` - in particular `multiplicity(M, x)` will drop by
-$1$. Much stronger, `delete!(M, x)` will remove *all* instances of `x` in `M` and
-so `multiplicity(M, x)` will be $0$.
+Finally, the sum and difference for `MSet` are also available. Difference is
+given by the complements of sets and the sum is given by disjoint union of sets.
 
-Finally, one can take unions (`union`, `union!`) of an `MSet` with other
-finite collections of objects. Note that `union!` will require coercion of
-elements. Similarly, one can compare an `MSet` with another collection with the
-`setdiff/setdiff!` functions.
+```@docs
+sum(::MSet, ::Mset)
+Base.:(-)(::MSet, ::MSet...)
+```
 
 ## Sub-set iterators
 
diff --git a/src/Misc/MSet.jl b/src/Misc/MSet.jl
index bfb55e8bdd..9dbc9c94a8 100644
--- a/src/Misc/MSet.jl
+++ b/src/Misc/MSet.jl
@@ -4,6 +4,14 @@ export multiplicity
 export multiset
 export subsets
 
+###############################################################################
+#
+#  Multi-sets
+#
+###############################################################################
+
+### Type and constructors
+
 @doc raw"""
     MSet{T} <: AbstractSet{T}
 
@@ -26,7 +34,15 @@ mutable struct MSet{T} <: AbstractSet{T}
   dict::Dict{T,Int}
 
   MSet{T}() where {T} = new{T}(Dict{T,Int}())
-  MSet{T}(itr) where {T} = union!(new{T}(Dict{T,Int}()), itr)
+
+  function MSet{T}(itr) where {T}
+    s = new{T}(Dict{T, Int}())
+    for x in itr
+      push!(s, x)
+    end
+    return s
+  end
+
   MSet{T}(d::Dict{T, Int}) where {T} = new{T}(d)
   MSet{T}(l::Vector{T}, m::Vector{Int}) where {T} = MSet{T}(Dict(zip(l, m)))
 end
@@ -90,7 +106,7 @@ MSet{String} with 14 elements:
   "a" : 4
 ```
 """
-multiset(iter) = MSet(iter)
+multiset(itr) = MSet(itr)
 
 function multiset(d::Dict{T, Int}) where {T}
   @req minimum(collect(values(d))) > 0 "The values of d must be positive integers"
@@ -147,6 +163,10 @@ MSet{QQFieldElem}()
 Base.similar(::MSet{T}) where {T} = MSet{T}()
 Base.similar(::MSet, T::Type) = MSet{T}()
 
+Base.copy(s::MSet) = union!(similar(s), s)
+
+### Show methods
+
 # We try to adopt the same conventions as in Oscar, so one-line printing should
 # stay in one line, and we do not give details about what is in the MSet: the
 # detailled printing will take care of it
@@ -190,9 +210,9 @@ function Base.show(io::IO, ::MIME"text/plain", s::MSet)
     print(io, Indent())
     d = s.dict
     un = collect(keys(d))
+    rmax = maximum(ndigits(k) for k in values(d))
+    offmax = szw - (rmax + 3)
     if length(un) <= szh
-      rmax = maximum(ndigits(k) for k in values(d))
-      offmax = szw - (rmax + 3)
       lmax = min(maximum(length(sprint(show, a)) for a in un), offmax)
       for k in un
         pk = sprint(show, k)
@@ -210,16 +230,21 @@ function Base.show(io::IO, ::MIME"text/plain", s::MSet)
         end
       end
     else
-      lmax = maximum(length(sprint(show, a)) for a in un[1:szh])
+      lmax = min(maximum(length(sprint(show, a)) for a in un[1:szh]), offmax)
       for i in 1:szh
         println(io)
         k = un[i]
+        pk = sprint(show, k)
         lk = length(sprint(show, k))
         v = d[k]
-        if v > 1
-          print(io, "$k", " "^(lmax-lk+1), ": $v")
+        if lk > offmax
+          print(io, pk[1:offmax-length(" \u2026")], " \u2026")
         else
-          print(io, "$k")
+          print(io, pk)
+        end
+        lk = min(offmax, lk)
+        if v > 1
+          print(io, " "^(lmax-lk+1), ": $v")
         end
       end
       println(io)
@@ -228,18 +253,42 @@ function Base.show(io::IO, ::MIME"text/plain", s::MSet)
   end
 end
 
+### Iteration
+
 Base.isempty(s::MSet) = isempty(s.dict)
 Base.length(s::MSet) = sum(values(s.dict))
 Base.IteratorSize(::Type{MSet}) = Base.HasLength()
 Base.IteratorEltype(::Type{MSet}) = Base.HasEltype()
 Base.eltype(::Type{MSet{T}}) where {T} = T
-Base.in(x, s::MSet) = haskey(s.dict, x)
+Base.in(x, s::MSet) = any(y -> x == y, keys(s.dict))
 
-function Base.push!(s::MSet, x, mult::Int=1)
-  add_to_key!(s.dict, x, mult)
+function Base.iterate(s::MSet)
+  I = iterate(s.dict)
+  I === nothing && return I
+  return I[1][1], (I[1], I[2], 1)
+end
+
+function Base.iterate(s::MSet, state)
+  if state[3] < state[1][2]
+    return state[1][1], (state[1], state[2], state[3]+1)
+  else
+    I = iterate(s.dict, state[2])
+    I === nothing && return I
+    val, st = I
+    return (val[1], (val, st, 1))
+  end
+end
+
+### MSets operations
+
+function Base.push!(s::MSet{T}, x, mult::Int=1) where {T}
+  @req promote_type(T, typeof(x)) == T "Cannot coerce element"
+  y = x isa T ? x : T(x)
+  add_to_key!(s.dict, y, mult)
 end
 
 function Base.pop!(s::MSet{T}, x) where {T}
+  @req promote_type(T, typeof(x)) == T "Cannot coerce element"
   y = x isa T ? x : T(x)
   y in s || throw(KeyError(y))
   add_to_key!(s.dict, y, -1)
@@ -247,6 +296,7 @@ function Base.pop!(s::MSet{T}, x) where {T}
 end
 
 function Base.pop!(s::MSet{T}, x, default) where {T}
+  @req promote_type(T, typeof(x)) == T "Cannot coerce element"
   y = x isa T ? x : T(x)
   return y in s ? pop!(s, y) : (default isa T ? default : T(default))
 end
@@ -254,53 +304,227 @@ end
 Base.pop!(s::MSet) = (val = iterate(s.dict)[1][1]; pop!(s, val))
 
 function Base.delete!(s::MSet{T}, x) where {T}
+  @req promote_type(T, typeof(x)) == T "Cannot coerce element"
   y = x isa T ? x : T(x)
   delete!(s.dict, y)
   return s
 end
 
-Base.copy(s::MSet) = union!(similar(s), s)
+Base.setdiff(s::MSet, itrs...) = setdiff!(copy(s), itrs...)
 
-==(s1::MSet, s2::MSet) = s1.dict == s2.dict
+function Base.setdiff!(s::MSet, itrs...)
+  for x in itr
+    setdiff!(s, itr)
+  end
+  return s
+end
 
-function Base.iterate(s::MSet)
-  I = iterate(s.dict)
-  I === nothing && return I
-  return I[1][1], (I[1], I[2], 1)
+function Base.setdiff!(s::MSet, itr)
+  for x in itr
+    pop!(s, x, x)
+  end
+  return s
 end
 
-function Base.iterate(s::MSet, state)
-  if state[3] < state[1][2]
-    return state[1][1], (state[1], state[2], state[3]+1)
-  else
-    I = iterate(s.dict, state[2])
-    I === nothing && return I
-    val, st = I
-    return (val[1], (val, st, 1))
+@doc raw"""
+    Base.:(-)(s::MSet, itrs::MSet...) -> MSet
+
+Return the multi-set associated to the complement in `s` of the collections
+in `itrs`.
+
+Alias for `setdiff(s, itrs...)`.
+
+# Examples
+```jldoctest
+julia> m = multiset("A very nice sentence")
+MSet{Char} with 20 elements:
+  'n' : 3
+  'e' : 5
+  'A'
+  'y'
+  'i'
+  'r'
+  's'
+  't'
+  ' ' : 3
+  'c' : 2
+  'v'
+
+julia> n = multiset("A nice sentence")
+MSet{Char} with 15 elements:
+  'n' : 3
+  'A'
+  'c' : 2
+  'i'
+  'e' : 4
+  's'
+  't'
+  ' ' : 2
+
+julia> n-m
+MSet{Char}()
+
+julia> m-n
+MSet{Char} with 5 elements:
+  'e'
+  'y'
+  'r'
+  ' '
+  'v'
+```
+"""
+Base.:(-)(s::MSet, itrs::MSet...) = setdiff(s, itrs...)
+
+function Base.unique(s::MSet)
+  return collect(keys(s.dict))
+end
+
+function Base.issubset(s1::MSet{T}, s2::MSet{U}) where {T, U}
+  @req promote_type(T, U) == U "Cannot compare multi-sets"
+  !issubset(U[convert(U, x) for x in keys(s1.dict)], unique(s2)) && return false
+  for x in unique(s2)
+    (multiplicity(s1, x) > multiplicity(s2, x)) && return false
+  end
+  return true
+end
+
+@doc raw"""
+    Base.sum(s::MSet, itrs::MSet...) -> MSet
+    Base.:(+)(s::MSet, itrs::MSet...) -> MSet
+
+Return the multi-sets associated to the disjoint union of `s` and the
+collections of objects in `itrs`.
+
+# Examples
+```jldoctest
+julia> m = multiset("A nice sentence")
+MSet{Char} with 15 elements:
+  'n' : 3
+  'A'
+  'c' : 2
+  'i'
+  'e' : 4
+  's'
+  't'
+  ' ' : 2
+
+julia> n = multiset("A very nice sentence")
+MSet{Char} with 20 elements:
+  'n' : 3
+  'e' : 5
+  'A'
+  'y'
+  'i'
+  'r'
+  's'
+  't'
+  ' ' : 3
+  'c' : 2
+  'v'
+
+julia> m + n
+MSet{Char} with 35 elements:
+  'n' : 6
+  'e' : 9
+  'A' : 2
+  's' : 2
+  'i' : 2
+  't' : 2
+  'y'
+  'r'
+  ' ' : 5
+  'c' : 4
+  'v'
+```
+"""
+function Base.sum(s1::MSet, s2::MSet)
+  T = Base.promote_eltype(s1, s2)
+  s = similar(s1, T)
+  d = s.dict
+  val = union(unique(s1), unique(s2))
+  for x in val
+    d[x] = multiplicity(s1, x) + multiplicity(s2, x)
   end
+  return s
+end
+
+function Base.sum(s::MSet, itrs::MSet...)
+  s2 = sum(s, itrs[1])
+  return sum(s2, itrs[2:end]...)
 end
 
+Base.:(+)(s::MSet, itrs::MSet...) = sum(s, itrs...)
+
 Base.union(s::MSet) = copy(s)
 
-function Base.union(s::MSet, sets...)
-  T = Base.promote_eltype(s, sets...)
-  u = MSet{T}()
-  union!(u, s)
-  for t in sets
-    union!(u, t)
+function Base.union(s1::MSet, s2::MSet)
+  T = Base.promote_eltype(s1, s2)
+  s = similar(s1, T)
+  d = s.dict
+  val = union(unique(s1), unique(s2))
+  for x in val
+    d[x] = max(multiplicity(s1, x), multiplicity(s2, x))
+  end
+  return s
+end
+
+function Base.union(s::MSet, itrs...)
+  s2 = union(s, multiset(itrs[1]))
+  return union(s2, itrs[2:end]...)
+end
+
+function Base.union!(s1::MSet{T}, s2::MSet{U}) where {T, U}
+  @req promote_type(T, U) == T "Cannot coerce elements"
+  val = union(unique(s1), T[convert(T, x) for x in keys(s2.dict)])
+  d = s1.dict
+  for x in val
+    d[x] = max(multiplicity(s1, x), multiplicity(s2, x))
   end
-  return u
+  return s1
+end
+
+function Base.union!(s::MSet, itrs...)
+  union!(s, multiset(itrs[1]))
+  return union!(s, itrs[2:end]...)
 end
 
-function Base.union!(s::MSet, xs)
-  T = eltype(s)
-  @req promote_type(T, eltype(xs)) == T "Cannot coerce elements"
-  for x in xs
-    push!(s, convert(T, x))
+function Base.intersect(s1::MSet, s2::MSet)
+  val = unique(s1)
+  filter!(x -> any(y -> x == y, keys(s2.dict)), val)
+  T = promote_type(eltype(s1), typeof.(val)...)
+  s = similar(s1, T)
+  d = s.dict
+  for x in val
+    d[x] = min(multiplicity(s1, x), multiplicity(s2, x))
   end
   return s
 end
 
+function Base.intersect(s::MSet, itrs...)
+  s2 = intersect(s, multiset(itrs[1]))
+  return intersect(s2, itrs[2:end]...)
+end
+
+function Base.intersect!(s1::MSet{T}, s2::MSet) where {T}
+  val = unique(s1)
+  filter!(x -> any(y -> x == y, keys(s2.dict)), val)
+  @req promote_type(T, typeof.(val)...) == T "Cannot coerce elements"
+  d = s1.dict
+  for x in unique(s1)
+    if !(x in val)
+      delete!(s1, x)
+    else
+      d[x] = min(multiplicity(s1, x), multiplicity(s2, T(x)))
+    end
+  end
+  return s1
+end
+
+function Base.intersect!(s::MSet, itrs...)
+  s2 = intersect!(s, multiset(itrs[1]))
+  return intersect!(s2, itrs[2:end]...)
+end
+
 function Base.filter(pred, s::MSet)
   t = similar(s)
   for (x, m) in s.dict
@@ -380,7 +604,8 @@ julia> multiplicity(m, 6)
 0
 ```
 """
-function multiplicity(s::MSet{T}, x::T) where {T}
+function multiplicity(s::MSet{T}, x) where {T}
+  @req promote_type(T, typeof(x)) == T "Cannot coerce element"
   y = x isa T ? x : T(x)
   if haskey(s.dict, y)
     return s.dict[y]
@@ -389,29 +614,13 @@ function multiplicity(s::MSet{T}, x::T) where {T}
   end
 end
 
-function Base.unique(s::MSet)
-  return collect(keys(s.dict))
-end
-
-Base.setdiff(s::MSet, itrs...) = setdiff!(copy(s), itrs...)
-
-function Base.setdiff!(s::MSet, itrs...)
-  for x in itr
-    setdiff!(s, itr)
-  end
-  return s
-end
-
-function Base.setdiff!(s::MSet, itr)
-  for x in itr
-    pop!(s, x)
-  end
-  return s
-end
+###############################################################################
+#
+#  Sub-set iterators
+#
+###############################################################################
 
-############################################
-# subsets iterator
-############################################
+### Sub-multi-sets
 
 struct MSubSetItr{T}
   b::Vector{T}
@@ -430,12 +639,6 @@ function subsets(s::MSet{T}) where T
   # subset (bi, ni) -> sum ni gi where gi = prod (mj+1)
   b = unique(s)
   m = Int[multiplicity(s, x) for x in b]
-  #= not needed for the iterator
-  g = [1]
-  for i=2:length(b)
-    push!(g, g[end]*(m[i]+1))
-  end
-  =#
   return MSubSetItr{T}(b, m, length(m) == 0 ? 1 : prod(x+1 for x in m))
 end
 
@@ -485,7 +688,8 @@ end
 
 #... to be completed from base/Set.jl ...
 
-#subsets for Set
+### Arbitrary sub-sets
+
 struct SubSetItr{T}
   b::Vector{T}
   length::Int
@@ -547,7 +751,8 @@ function Base.show(io::IO, ::MIME"text/plain", M::SubSetItr)
   print(io, Dedent(), "of length ", M.length)
 end
 
-#only subsets of a given size
+### Sub-sets of a given size
+
 struct SubSetSizeItr{T}
   b::Vector{T}
   k::Int #subsets of size k only
diff --git a/src/NumField/NfAbs/Elem.jl b/src/NumField/NfAbs/Elem.jl
index ffbb961e5b..a4c5a8a469 100644
--- a/src/NumField/NfAbs/Elem.jl
+++ b/src/NumField/NfAbs/Elem.jl
@@ -462,7 +462,7 @@ function _ds(fa)
   @assert all(x->x == 1, values(fa.fac))
   T = Int[degree(x) for x = keys(fa.fac)]
   M = MSet(T)
-  return Set(sum(s) for s = subsets(M) if length(s) > 0)
+  return Set(sum(collect(s)) for s = subsets(M) if length(s) > 0)
 end
 
 function _degset(f::ZZPolyRingElem, p::Int)
diff --git a/src/NumField/NfAbs/PolyFact.jl b/src/NumField/NfAbs/PolyFact.jl
index 2f59be21ef..dc9600182a 100644
--- a/src/NumField/NfAbs/PolyFact.jl
+++ b/src/NumField/NfAbs/PolyFact.jl
@@ -318,7 +318,7 @@ function degree_set(fa::Dict{Int, Int})
     ind += v
   end
   M = MSet(T)
-  return Set(sum(s) for s = subsets(M) if length(s) > 0)
+  return Set(sum(collect(s)) for s = subsets(M) if length(s) > 0)
 end
 
 @doc raw"""
diff --git a/test/Misc/MSet.jl b/test/Misc/MSet.jl
index d9c1f22758..890bd254e6 100644
--- a/test/Misc/MSet.jl
+++ b/test/Misc/MSet.jl
@@ -10,8 +10,8 @@
   @test length(String(take!(io))) == 39
 
   M = MSet(root_lattice(:A, i) for j in 1:10 for i in 1:100)
-  show(io, MIME"text/plain"(), m)
-  @test length(String(take!(io))) == 983
+  show(io, MIME"text/plain"(), M)
+  @test length(String(take!(io))) == 945
 
   m = @inferred multiset(Int[x^3%8 for x = 1:50])
   @test !isempty(m)
@@ -37,12 +37,16 @@
 
   m = @inferred multiset(Dict("a" => 4, "b" => 1, "c" => 9))
   lis = @inferred collect(m)
+  @test length(m) == length(lis)
 
-  m2 = @inferred union(m, lis)
+  m2 = @inferred m + m
   for i in m
     @test multiplicity(m2, i) == 2*multiplicity(m, i)
   end
 
+  m3 = @inferred m-m
+  @test length(m3) == 0
+
   @test union(m) == m
   @test length(filter(x -> multiplicity(m, x) != 1, m)) == length(m) - 1
 
@@ -68,6 +72,10 @@ end
   @test length(String(take!(io))) == 35
   @test eltype(M) == typeof(m)
   @test length(collect(M)) == length(M)
+
+  n = collect(M)[end]
+  @test union(m, n) == m
+  @test intersect(m, n) == n
 end
 
 @testset "Sub-set iterators" begin