Skip to content

Commit

Permalink
Connect our serialization with Julia serialization (#2837)
Browse files Browse the repository at this point in the history
  • Loading branch information
antonydellavecchia authored Oct 2, 2023
1 parent 141adbf commit aa9dbc2
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 109 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.13.1-DEV"
[deps]
AbstractAlgebra = "c3fe647b-3220-5bb0-a1ea-a7954cac585d"
AlgebraicSolving = "66b61cbe-0446-4d5d-9090-1ff510639f9d"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
GAP = "c863536a-3901-11e9-33e7-d5cd0df7b904"
Hecke = "3e1990a7-5d81-5526-99ce-9ba3ff248f21"
Expand All @@ -18,6 +19,7 @@ Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RandomExtensions = "fb686558-2515-59ef-acaa-46db3789a887"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Singular = "bcd08a7b-43d2-5ff7-b6d4-c458787f915c"
TOPCOM_jll = "36f60fef-b880-50dc-9289-4aaecee93cc3"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
Expand Down
197 changes: 111 additions & 86 deletions src/Serialization/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,70 +162,6 @@ const oscar_serialization_version = get_version_info()
# parameters of type should not matter here
const reverse_type_map = Dict{String, Type}()

function register_serialization_type(@nospecialize(T::Type), str::String)
if haskey(reverse_type_map, str) && reverse_type_map[str] != T
error("encoded type $str already registered for a different type: $T versus $(reverse_type_map[str])")
end
reverse_type_map[str] = T
end

# @register_serialization_type NewType "String Representation of type" uses_id uses_params

# register_serialization_type is a macro to ensure that the string we generate
# matches exactly the expression passed as first argument, and does not change
# in unexpected ways when import/export statements are adjusted.
# The last three arguments are optional and can arise in any order. Passing a string
# argument will override how the type is stored as a string. The last two are boolean
# flags. When setting uses_id the object will be stored as a reference and will be
# referred to throughout the serialization using a UUID. This should typically only
# be used for types that do not have a fixed normal form for example PolyRing and MPolyRing.
# Using the uses_params flag will serialize the object with a more structured type
# description which will make the serialization more efficient see the discussion on
# save_type_params / load_type_params below.
function register_serialization_type(ex::Any, str::String, uses_id::Bool, uses_params::Bool)
return esc(
quote
register_serialization_type($ex, $str)
encode_type(::Type{<:$ex}) = $str
# There exist types where equality cannot be discerned from the serialization
# these types require an id so that equalities can be forced upon load.
# The ids are only necessary for parent types, checking for element type equality
# can be done once the parents are known to be equal.
# For example two serializations of QQ[x] require ids to check for equality.
# Although they're isomorphic rings, they may want to be treated as separate
# This is done since other software might not use symbols in their serialization of QQ[x].
# Which will then still allow for the distinction between QQ[x] and QQ[y], i.e.
# whenever there is a possibility (amongst any software system) that the objects
# cannot be distinguish on a syntactic level we use ids.
# Types like ZZ, QQ, and ZZ/nZZ do not require ids since there is no syntactic
# ambiguities in their encodings.

serialize_with_id(obj::T) where T <: $ex = $uses_id
serialize_with_id(T::Type{<:$ex}) = $uses_id
serialize_with_params(T::Type{<:$ex}) = $uses_params
end)
end

macro register_serialization_type(ex::Any, args...)
uses_id = false
uses_params = false
str = nothing
for el in args
if el isa String
str = el
elseif el == :uses_id
uses_id = true
elseif el == :uses_params
uses_params = true
end
end
if str === nothing
str = string(ex)
end

return register_serialization_type(ex, str, uses_id, uses_params)
end

function encode_type(::Type{T}) where T
error("unsupported type '$T' for encoding")
end
Expand Down Expand Up @@ -390,6 +326,90 @@ function load_parents(s::DeserializerState, parent_ids::Vector)
return loaded_parents
end

################################################################################
# Type Registration
function register_serialization_type(@nospecialize(T::Type), str::String)
if haskey(reverse_type_map, str) && reverse_type_map[str] != T
error("encoded type $str already registered for a different type: $T versus $(reverse_type_map[str])")
end
reverse_type_map[str] = T
end

# @register_serialization_type NewType "String Representation of type" uses_id uses_params

# register_serialization_type is a macro to ensure that the string we generate
# matches exactly the expression passed as first argument, and does not change
# in unexpected ways when import/export statements are adjusted.
# The last three arguments are optional and can arise in any order. Passing a string
# argument will override how the type is stored as a string. The last two are boolean
# flags. When setting uses_id the object will be stored as a reference and will be
# referred to throughout the serialization using a UUID. This should typically only
# be used for types that do not have a fixed normal form for example PolyRing and MPolyRing.
# Using the uses_params flag will serialize the object with a more structured type
# description which will make the serialization more efficient see the discussion on
# save_type_params / load_type_params below.

import Serialization.serialize
import Serialization.deserialize
import Serialization.serialize_type
import Distributed.AbstractSerializer

function register_serialization_type(ex::Any, str::String, uses_id::Bool, uses_params::Bool)
return esc(
quote
register_serialization_type($ex, $str)
encode_type(::Type{<:$ex}) = $str
# There exist types where equality cannot be discerned from the serialization
# these types require an id so that equalities can be forced upon load.
# The ids are only necessary for parent types, checking for element type equality
# can be done once the parents are known to be equal.
# For example two serializations of QQ[x] require ids to check for equality.
# Although they're isomorphic rings, they may want to be treated as separate
# This is done since other software might not use symbols in their serialization of QQ[x].
# Which will then still allow for the distinction between QQ[x] and QQ[y], i.e.
# whenever there is a possibility (amongst any software system) that the objects
# cannot be distinguish on a syntactic level we use ids.
# Types like ZZ, QQ, and ZZ/nZZ do not require ids since there is no syntactic
# ambiguities in their encodings.

serialize_with_id(obj::T) where T <: $ex = $uses_id
serialize_with_id(T::Type{<:$ex}) = $uses_id
serialize_with_params(T::Type{<:$ex}) = $uses_params

# only extend serialize on non std julia types
if !($ex <: Union{Number, String, Bool, Symbol, Vector, Tuple, Matrix, NamedTuple})
function serialize(s::AbstractSerializer, obj::T) where T <: $ex
serialize_type(s, T)
save(s.io, obj; serializer_type=IPCSerializer)
end
function deserialize(s::AbstractSerializer, ::Type{<:$ex})
load(s.io; serializer_type=IPCSerializer)
end
end

end)
end

macro register_serialization_type(ex::Any, args...)
uses_id = false
uses_params = false
str = nothing
for el in args
if el isa String
str = el
elseif el == :uses_id
uses_id = true
elseif el == :uses_params
uses_params = true
end
end
if str === nothing
str = string(ex)
end

return register_serialization_type(ex, str, uses_id, uses_params)
end

################################################################################
# Include serialization implementations for various types

Expand Down Expand Up @@ -449,39 +469,43 @@ julia> load("/tmp/fourtitwo.json")
42
```
"""
function save(io::IO, obj::T; metadata::Union{MetaData, Nothing}=nothing) where T
state = serializer_open(io)
save_data_dict(state) do
function save(io::IO, obj::T; metadata::Union{MetaData, Nothing}=nothing,
serializer_type::Type{<: OscarSerializer} = JSONSerializer) where T
s = state(serializer_open(io, serializer_type))
save_data_dict(s) do
# write out the namespace first
save_header(state, oscar_serialization_version, :_ns)
save_header(s, oscar_serialization_version, :_ns)

save_typed_object(state, obj)
save_typed_object(s, obj)

if serialize_with_id(T)
ref = get(global_serializer_state.obj_to_id, obj, nothing)
if isnothing(ref)
ref = global_serializer_state.obj_to_id[obj] = uuid4()
global_serializer_state.id_to_obj[ref] = obj
end
save_object(state, string(ref), :id)
save_object(s, string(ref), :id)

end

# this should be handled by serializers in a later commit / PR
!isempty(state.refs) && save_data_dict(state, refs_key) do
for id in state.refs
ref_obj = global_serializer_state.id_to_obj[id]
state.key = Symbol(id)
save_data_dict(state) do
save_typed_object(state, ref_obj)
if !isempty(s.refs) && serializer_type == JSONSerializer
save_data_dict(s, refs_key) do
for id in s.refs
ref_obj = global_serializer_state.id_to_obj[id]
s.key = Symbol(id)
save_data_dict(s) do
save_typed_object(s, ref_obj)
end
end
end
end

if !isnothing(metadata)
save_json(state, json(metadata), :meta)
save_json(s, json(metadata), :meta)
end
end
serializer_close(state)
serializer_close(s)
return nothing
end

Expand Down Expand Up @@ -550,8 +574,9 @@ julia> parent(loaded_p_v[1]) === parent(loaded_p_v[2]) === R
true
```
"""
function load(io::IO; params::Any = nothing, type::Any = nothing)
state = deserializer_open(io)
function load(io::IO; params::Any = nothing, type::Any = nothing,
serializer_type=JSONSerializer)
s = state(deserializer_open(io, serializer_type))

# this should be moved to the serializer at some point
jsondict = JSON.parse(io, dicttype=Dict{Symbol, Any})
Expand Down Expand Up @@ -581,7 +606,7 @@ function load(io::IO; params::Any = nothing, type::Any = nothing)

# add refs to state for referencing during recursion
if haskey(jsondict, refs_key)
merge!(state.refs, jsondict[refs_key])
merge!(s.refs, jsondict[refs_key])
end

if type !== nothing
Expand All @@ -596,22 +621,22 @@ function load(io::IO; params::Any = nothing, type::Any = nothing)

if serialize_with_params(type)
if isnothing(params)
params = load_type_params(state, type, jsondict[type_key][:params])
params = load_type_params(s, type, jsondict[type_key][:params])
end
loaded = load_object(state, type, jsondict[:data], params)

loaded = load_object(s, type, jsondict[:data], params)
else
Base.issingletontype(type) && return type()
loaded = load_object(state, type, jsondict[:data])
loaded = load_object(s, type, jsondict[:data])
end
else
loaded = load_typed_object(state, jsondict; override_params=params)
loaded = load_typed_object(s, jsondict; override_params=params)
end

if haskey(jsondict, :id)
global_serializer_state.obj_to_id[loaded] = UUID(jsondict[:id])
global_serializer_state.id_to_obj[UUID(jsondict[:id])] = loaded
end

return loaded
end

Expand Down
61 changes: 38 additions & 23 deletions src/Serialization/serializers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,44 @@ function SerializerState(io::IO)
return SerializerState(true, UUID[], io, nothing)
end

struct DeserializerState
# or perhaps Dict{Int,Any} to be resilient against corrupts/malicious files using huge ids
# the values of refs are objects to be deserialized
refs::Dict{Symbol, Dict}
end

function DeserializerState()
return DeserializerState(Dict{Symbol, Any}())
end

################################################################################
# Serializers
abstract type OscarSerializer end

struct JSONSerializer <: OscarSerializer
state::S where S <: Union{SerializerState, DeserializerState}
end

struct IPCSerializer <: OscarSerializer
state::S where S <: Union{SerializerState, DeserializerState}
end

state(s::OscarSerializer) = s.state

function serializer_open(io::IO, T::Type{<: OscarSerializer})
# some level of handling should be done here at a later date
return T(SerializerState(io))
end

function serializer_close(s::SerializerState)
finish_writing(s)
end

function deserializer_open(io::IO, T::Type{<: OscarSerializer})
# should eventually take io
return T(DeserializerState())
end

function serialize_dict(f::Function, s::SerializerState)
begin_dict_node(s)
f()
Expand Down Expand Up @@ -82,16 +120,6 @@ function end_array_node(s::SerializerState)
end
end

struct DeserializerState
# or perhaps Dict{Int,Any} to be resilient against corrupts/malicious files using huge ids
# the values of refs are objects to be deserialized
refs::Dict{Symbol, Dict}
end

function DeserializerState()
return DeserializerState(Dict{Symbol, Any}())
end

function finish_writing(s::SerializerState)
# nothing to do here
end
Expand Down Expand Up @@ -137,16 +165,3 @@ function save_data_json(s::SerializerState, jsonstr::Any,
write(s.io, jsonstr)
end

function serializer_open(io::IO)
# some level of handling should be done here at a later date
return SerializerState(io)
end

function serializer_close(s::SerializerState)
finish_writing(s)
end

function deserializer_open(io::IO)
# should eventually take io
return DeserializerState()
end
Loading

0 comments on commit aa9dbc2

Please sign in to comment.