Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Connect our serialization with Julia serialization #2837

Merged
merged 7 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.13.1-DEV"
[deps]
AbstractAlgebra = "c3fe647b-3220-5bb0-a1ea-a7954cac585d"
AlgebraicSolving = "66b61cbe-0446-4d5d-9090-1ff510639f9d"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
GAP = "c863536a-3901-11e9-33e7-d5cd0df7b904"
Hecke = "3e1990a7-5d81-5526-99ce-9ba3ff248f21"
Expand All @@ -18,6 +19,7 @@ Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RandomExtensions = "fb686558-2515-59ef-acaa-46db3789a887"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Singular = "bcd08a7b-43d2-5ff7-b6d4-c458787f915c"
TOPCOM_jll = "36f60fef-b880-50dc-9289-4aaecee93cc3"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
Expand Down
197 changes: 111 additions & 86 deletions src/Serialization/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,70 +162,6 @@
# parameters of type should not matter here
const reverse_type_map = Dict{String, Type}()

function register_serialization_type(@nospecialize(T::Type), str::String)
if haskey(reverse_type_map, str) && reverse_type_map[str] != T
error("encoded type $str already registered for a different type: $T versus $(reverse_type_map[str])")
end
reverse_type_map[str] = T
end

# @register_serialization_type NewType "String Representation of type" uses_id uses_params

# register_serialization_type is a macro to ensure that the string we generate
# matches exactly the expression passed as first argument, and does not change
# in unexpected ways when import/export statements are adjusted.
# The last three arguments are optional and can arise in any order. Passing a string
# argument will override how the type is stored as a string. The last two are boolean
# flags. When setting uses_id the object will be stored as a reference and will be
# referred to throughout the serialization using a UUID. This should typically only
# be used for types that do not have a fixed normal form for example PolyRing and MPolyRing.
# Using the uses_params flag will serialize the object with a more structured type
# description which will make the serialization more efficient see the discussion on
# save_type_params / load_type_params below.
function register_serialization_type(ex::Any, str::String, uses_id::Bool, uses_params::Bool)
return esc(
quote
register_serialization_type($ex, $str)
encode_type(::Type{<:$ex}) = $str
# There exist types where equality cannot be discerned from the serialization
# these types require an id so that equalities can be forced upon load.
# The ids are only necessary for parent types, checking for element type equality
# can be done once the parents are known to be equal.
# For example two serializations of QQ[x] require ids to check for equality.
# Although they're isomorphic rings, they may want to be treated as separate
# This is done since other software might not use symbols in their serialization of QQ[x].
# Which will then still allow for the distinction between QQ[x] and QQ[y], i.e.
# whenever there is a possibility (amongst any software system) that the objects
# cannot be distinguish on a syntactic level we use ids.
# Types like ZZ, QQ, and ZZ/nZZ do not require ids since there is no syntactic
# ambiguities in their encodings.

serialize_with_id(obj::T) where T <: $ex = $uses_id
serialize_with_id(T::Type{<:$ex}) = $uses_id
serialize_with_params(T::Type{<:$ex}) = $uses_params
end)
end

macro register_serialization_type(ex::Any, args...)
uses_id = false
uses_params = false
str = nothing
for el in args
if el isa String
str = el
elseif el == :uses_id
uses_id = true
elseif el == :uses_params
uses_params = true
end
end
if str === nothing
str = string(ex)
end

return register_serialization_type(ex, str, uses_id, uses_params)
end

function encode_type(::Type{T}) where T
error("unsupported type '$T' for encoding")
end
Expand Down Expand Up @@ -390,6 +326,90 @@
return loaded_parents
end

################################################################################
# Type Registration
function register_serialization_type(@nospecialize(T::Type), str::String)
if haskey(reverse_type_map, str) && reverse_type_map[str] != T
error("encoded type $str already registered for a different type: $T versus $(reverse_type_map[str])")

Check warning on line 333 in src/Serialization/main.jl

View check run for this annotation

Codecov / codecov/patch

src/Serialization/main.jl#L333

Added line #L333 was not covered by tests
end
reverse_type_map[str] = T
end

# @register_serialization_type NewType "String Representation of type" uses_id uses_params

# register_serialization_type is a macro to ensure that the string we generate
# matches exactly the expression passed as first argument, and does not change
# in unexpected ways when import/export statements are adjusted.
# The last three arguments are optional and can arise in any order. Passing a string
# argument will override how the type is stored as a string. The last two are boolean
# flags. When setting uses_id the object will be stored as a reference and will be
# referred to throughout the serialization using a UUID. This should typically only
# be used for types that do not have a fixed normal form for example PolyRing and MPolyRing.
# Using the uses_params flag will serialize the object with a more structured type
# description which will make the serialization more efficient see the discussion on
# save_type_params / load_type_params below.

import Serialization.serialize
import Serialization.deserialize
import Serialization.serialize_type
import Distributed.AbstractSerializer

function register_serialization_type(ex::Any, str::String, uses_id::Bool, uses_params::Bool)
return esc(
quote
register_serialization_type($ex, $str)
encode_type(::Type{<:$ex}) = $str
# There exist types where equality cannot be discerned from the serialization
# these types require an id so that equalities can be forced upon load.
# The ids are only necessary for parent types, checking for element type equality
# can be done once the parents are known to be equal.
# For example two serializations of QQ[x] require ids to check for equality.
# Although they're isomorphic rings, they may want to be treated as separate
# This is done since other software might not use symbols in their serialization of QQ[x].
# Which will then still allow for the distinction between QQ[x] and QQ[y], i.e.
# whenever there is a possibility (amongst any software system) that the objects
# cannot be distinguish on a syntactic level we use ids.
# Types like ZZ, QQ, and ZZ/nZZ do not require ids since there is no syntactic
# ambiguities in their encodings.

serialize_with_id(obj::T) where T <: $ex = $uses_id
serialize_with_id(T::Type{<:$ex}) = $uses_id
serialize_with_params(T::Type{<:$ex}) = $uses_params

# only extend serialize on non std julia types
if !($ex <: Union{Number, String, Bool, Symbol, Vector, Tuple, Matrix, NamedTuple})
function serialize(s::AbstractSerializer, obj::T) where T <: $ex
serialize_type(s, T)
save(s.io, obj; serializer_type=IPCSerializer)
end
function deserialize(s::AbstractSerializer, ::Type{<:$ex})
load(s.io; serializer_type=IPCSerializer)
end
end

end)
end

macro register_serialization_type(ex::Any, args...)
uses_id = false
uses_params = false
str = nothing
for el in args
if el isa String
str = el
elseif el == :uses_id
uses_id = true
elseif el == :uses_params
uses_params = true
end
end
if str === nothing
str = string(ex)
end

return register_serialization_type(ex, str, uses_id, uses_params)
end

################################################################################
# Include serialization implementations for various types

Expand Down Expand Up @@ -449,39 +469,43 @@
42
```
"""
function save(io::IO, obj::T; metadata::Union{MetaData, Nothing}=nothing) where T
state = serializer_open(io)
save_data_dict(state) do
function save(io::IO, obj::T; metadata::Union{MetaData, Nothing}=nothing,
serializer_type::Type{<: OscarSerializer} = JSONSerializer) where T
s = state(serializer_open(io, serializer_type))
save_data_dict(s) do
# write out the namespace first
save_header(state, oscar_serialization_version, :_ns)
save_header(s, oscar_serialization_version, :_ns)

save_typed_object(state, obj)
save_typed_object(s, obj)

if serialize_with_id(T)
ref = get(global_serializer_state.obj_to_id, obj, nothing)
if isnothing(ref)
ref = global_serializer_state.obj_to_id[obj] = uuid4()
global_serializer_state.id_to_obj[ref] = obj
end
save_object(state, string(ref), :id)
save_object(s, string(ref), :id)

end

# this should be handled by serializers in a later commit / PR
!isempty(state.refs) && save_data_dict(state, refs_key) do
for id in state.refs
ref_obj = global_serializer_state.id_to_obj[id]
state.key = Symbol(id)
save_data_dict(state) do
save_typed_object(state, ref_obj)
if !isempty(s.refs) && serializer_type == JSONSerializer
save_data_dict(s, refs_key) do
for id in s.refs
ref_obj = global_serializer_state.id_to_obj[id]
s.key = Symbol(id)
save_data_dict(s) do
save_typed_object(s, ref_obj)
end
end
end
end

if !isnothing(metadata)
save_json(state, json(metadata), :meta)
save_json(s, json(metadata), :meta)
end
end
serializer_close(state)
serializer_close(s)
return nothing
end

Expand Down Expand Up @@ -550,8 +574,9 @@
true
```
"""
function load(io::IO; params::Any = nothing, type::Any = nothing)
state = deserializer_open(io)
function load(io::IO; params::Any = nothing, type::Any = nothing,
serializer_type=JSONSerializer)
s = state(deserializer_open(io, serializer_type))

# this should be moved to the serializer at some point
jsondict = JSON.parse(io, dicttype=Dict{Symbol, Any})
Expand Down Expand Up @@ -581,7 +606,7 @@

# add refs to state for referencing during recursion
if haskey(jsondict, refs_key)
merge!(state.refs, jsondict[refs_key])
merge!(s.refs, jsondict[refs_key])
end

if type !== nothing
Expand All @@ -596,22 +621,22 @@

if serialize_with_params(type)
if isnothing(params)
params = load_type_params(state, type, jsondict[type_key][:params])
params = load_type_params(s, type, jsondict[type_key][:params])
end
loaded = load_object(state, type, jsondict[:data], params)

loaded = load_object(s, type, jsondict[:data], params)
else
Base.issingletontype(type) && return type()
loaded = load_object(state, type, jsondict[:data])
loaded = load_object(s, type, jsondict[:data])
end
else
loaded = load_typed_object(state, jsondict; override_params=params)
loaded = load_typed_object(s, jsondict; override_params=params)
end

if haskey(jsondict, :id)
global_serializer_state.obj_to_id[loaded] = UUID(jsondict[:id])
global_serializer_state.id_to_obj[UUID(jsondict[:id])] = loaded
end

return loaded
end

Expand Down
61 changes: 38 additions & 23 deletions src/Serialization/serializers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,44 @@ function SerializerState(io::IO)
return SerializerState(true, UUID[], io, nothing)
end

struct DeserializerState
# or perhaps Dict{Int,Any} to be resilient against corrupts/malicious files using huge ids
# the values of refs are objects to be deserialized
refs::Dict{Symbol, Dict}
end

function DeserializerState()
return DeserializerState(Dict{Symbol, Any}())
end

################################################################################
# Serializers
abstract type OscarSerializer end

struct JSONSerializer <: OscarSerializer
state::S where S <: Union{SerializerState, DeserializerState}
end

struct IPCSerializer <: OscarSerializer
state::S where S <: Union{SerializerState, DeserializerState}
end

state(s::OscarSerializer) = s.state

function serializer_open(io::IO, T::Type{<: OscarSerializer})
# some level of handling should be done here at a later date
return T(SerializerState(io))
end

function serializer_close(s::SerializerState)
finish_writing(s)
end

function deserializer_open(io::IO, T::Type{<: OscarSerializer})
# should eventually take io
return T(DeserializerState())
end

function serialize_dict(f::Function, s::SerializerState)
begin_dict_node(s)
f()
Expand Down Expand Up @@ -82,16 +120,6 @@ function end_array_node(s::SerializerState)
end
end

struct DeserializerState
# or perhaps Dict{Int,Any} to be resilient against corrupts/malicious files using huge ids
# the values of refs are objects to be deserialized
refs::Dict{Symbol, Dict}
end

function DeserializerState()
return DeserializerState(Dict{Symbol, Any}())
end

function finish_writing(s::SerializerState)
# nothing to do here
end
Expand Down Expand Up @@ -137,16 +165,3 @@ function save_data_json(s::SerializerState, jsonstr::Any,
write(s.io, jsonstr)
end

function serializer_open(io::IO)
# some level of handling should be done here at a later date
return SerializerState(io)
end

function serializer_close(s::SerializerState)
finish_writing(s)
end

function deserializer_open(io::IO)
# should eventually take io
return DeserializerState()
end
Loading
Loading