Skip to content

Commit

Permalink
Rewriting C code for scan_object, get_size, and get_object_start_ref …
Browse files Browse the repository at this point in the history
…in Rust (#82)

This PR refactors the code for scanning Julia objects in Rust and
removes the C variant. It does the same for the functions
`get_so_object_size` and `get_object_start_ref`.
It also updates the types in `mmtk_julia_types.h`, from which we
generate a new version of `julia_types.rs` using Bindgen. Since Bindgen
generates inefficient code for accessing bitfields, I've also added
"custom" functions to access those fields (only the ones used in
`scan_object`, `get_so_object_size` and `get_object_start_ref`).

NB: needs to be merged with mmtk/julia#25
  • Loading branch information
udesou authored Jul 27, 2023
1 parent 6ec08d5 commit db49af4
Show file tree
Hide file tree
Showing 13 changed files with 2,048 additions and 865 deletions.
385 changes: 4 additions & 381 deletions julia/mmtk_julia.c

Large diffs are not rendered by default.

207 changes: 169 additions & 38 deletions julia/mmtk_julia_types.h
Original file line number Diff line number Diff line change
@@ -1,33 +1,19 @@
// install bindgen with cargo install bindgen-cli
// run ~/.cargo/bin/bindgen /home/eduardo/mmtk-julia/julia/mmtk_julia_types.h -o /home/eduardo/mmtk-julia/mmtk/src/julia_types.rs
#include <setjmp.h>

typedef signed char __int8_t;
typedef unsigned char __uint8_t;
typedef signed short int __int16_t;
typedef unsigned short int __uint16_t;
typedef signed int __int32_t;
typedef unsigned int __uint32_t;
typedef signed long int __int64_t;
typedef unsigned long int __uint64_t;
#include <stdint.h>

typedef __SIZE_TYPE__ size_t;

typedef __int8_t int8_t;
typedef __int16_t int16_t;
typedef __int32_t int32_t;
typedef __int64_t int64_t;
typedef __uint8_t uint8_t;
typedef __uint16_t uint16_t;
typedef __uint32_t uint32_t;
typedef __uint64_t uint64_t;

/* Types for `void *' pointers. */
typedef int intptr_t;
typedef unsigned long int uintptr_t;

struct mmtk__jl_taggedvalue_bits {
uintptr_t gc:2;
uintptr_t in_image:1;
uintptr_t unused:1;
#ifdef _P64
uintptr_t tag:60;
#else
uintptr_t tag:28;
#endif
};

typedef struct mmtk__jl_value_t mmtk_jl_value_t;
Expand Down Expand Up @@ -61,12 +47,14 @@ typedef struct {
} mmtk_jl_array_flags_t;

typedef struct {
uint32_t size;
uint32_t nfields;
uint32_t npointers; // number of pointers embedded inside
int32_t first_ptr; // index of the first pointer (or -1)
uint16_t alignment; // strictest alignment over all fields
uint16_t haspadding : 1; // has internal undefined bytes
uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
uint16_t padding : 13;
// union {
// jl_fielddesc8_t field8[nfields];
// jl_fielddesc16_t field16[nfields];
Expand All @@ -88,6 +76,7 @@ typedef struct {
// `wrapper` is either the only instantiation of the type (if no parameters)
// or a UnionAll accepting parameters to make an instantiation.
void *wrapper;
void *Typeofwrapper;
void *cache; // sorted array
void *linearcache; // unsorted array
void *mt;
Expand All @@ -98,7 +87,8 @@ typedef struct {
uint8_t abstract:1;
uint8_t mutabl:1;
uint8_t mayinlinealloc:1;
uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
uint8_t _reserved:5;
uint8_t max_methods;
} mmtk_jl_typename_t;

typedef struct {
Expand All @@ -114,16 +104,19 @@ typedef struct mmtk__jl_datatype_t {
mmtk_jl_svec_t *types;
mmtk_jl_value_t *instance; // for singletons
const mmtk_jl_datatype_layout_t *layout;
int32_t size; // TODO: move to _jl_datatype_layout_t
// memoized properties
uint32_t hash;
uint8_t hasfreetypevars:1; // majority part of isconcrete computation
uint8_t isconcretetype:1; // whether this type can have instances
uint8_t isdispatchtuple:1; // aka isleaftupletype
uint8_t isbitstype:1; // relevant query for C-api and type-parameters
uint8_t zeroinit:1; // if one or more fields requires zero-initialization
uint8_t has_concrete_subtype:1; // If clear, no value will have this datatype
uint8_t cached_by_hash:1; // stored in hash-based set cache (instead of linear cache)
uint16_t hasfreetypevars:1; // majority part of isconcrete computation
uint16_t isconcretetype:1; // whether this type can have instances
uint16_t isdispatchtuple:1; // aka isleaftupletype
uint16_t isbitstype:1; // relevant query for C-api and type-parameters
uint16_t zeroinit:1; // if one or more fields requires zero-initialization
uint16_t has_concrete_subtype:1; // If clear, no value will have this datatype
uint16_t maybe_subtype_of_cache:1; // Computational bit for has_concrete_supertype. See description in jltypes.c.
uint16_t isprimitivetype:1; // whether this is declared with 'primitive type' keyword (sized, no fields, and immutable)
uint16_t ismutationfree:1; // whether any mutable memory is reachable through this type (in the type or via fields)
uint16_t isidentityfree:1; // whether this type or any object reachable through its fields has non-content-based identity
uint16_t smalltag:6; // whether this type has a small-tag optimization
} mmtk_jl_datatype_t;

typedef struct {
Expand Down Expand Up @@ -153,15 +146,16 @@ typedef struct mmtk__jl_sym_t {

typedef struct {
// not first-class
mmtk_jl_sym_t *name;
_Atomic(void*) value;
_Atomic(void*) globalref; // cached GlobalRef for this binding
struct mmtk__jl_module_t* owner; // for individual imported bindings -- TODO: make _Atomic
void* globalref; // cached GlobalRef for this binding
struct mmtk__jl_binding_t* owner; // for individual imported bindings -- TODO: make _Atomic
_Atomic(void*) ty; // binding type
uint8_t constp:1;
uint8_t exportp:1;
uint8_t imported:1;
uint8_t usingfailed:1;
uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
uint8_t padding:2;
} mmtk_jl_binding_t;

#define HT_N_INLINE 32
Expand Down Expand Up @@ -195,9 +189,10 @@ typedef struct mmtk__jl_module_t {
void *name;
struct mmtk__jl_module_t *parent;
// hidden fields:
mmtk_htable_t bindings;
mmtk_jl_svec_t* bindings;
mmtk_jl_array_t* bindingkeyset; // index lookup by name into bindings
mmtk_arraylist_t usings; // modules with all bindings potentially imported
uint64_t build_id;
mmtk_jl_uuid_t build_id;
mmtk_jl_uuid_t uuid;
size_t primary_world;
_Atomic(uint32_t) counter;
Expand All @@ -208,6 +203,7 @@ typedef struct mmtk__jl_module_t {
uint8_t istopmod;
int8_t max_methods;
mmtk_jl_mutex_t lock;
intptr_t hash;
} mmtk_jl_module_t;

// Exception stack: a stack of pairs of (exception,raw_backtrace).
Expand All @@ -234,7 +230,7 @@ typedef struct mmtk__jl_excstack_t mmtk_jl_excstack_t;
/* Use the same type for `jmp_buf' and `sigjmp_buf'.
The `__mask_was_saved' flag determines whether
or not `longjmp' will restore the signal mask. */
typedef struct __jmp_buf_tag sigjmp_buf[1];
// typedef struct __jmp_buf_tag sigjmp_buf[1];

/* Store the calling environment in ENV, also saving the
signal mask if SAVEMASK is nonzero. Return 0. */
Expand Down Expand Up @@ -281,6 +277,9 @@ typedef struct {
#if defined(_COMPILER_TSAN_ENABLED_)
void *tsan_state;
#endif
#if defined(_COMPILER_ASAN_ENABLED_)
void *asan_fake_stack;
#endif
} mmtk_jl_ucontext_t;

typedef struct mmtk__jl_gcframe_t mmtk_jl_gcframe_t;
Expand All @@ -291,6 +290,8 @@ struct mmtk__jl_gcframe_t {
// actual roots go here
};

#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1

typedef struct mmtk__jl_task_t {
void *next; // invasive linked list for scheduler
void *queue; // invasive linked list for scheduler
Expand All @@ -299,7 +300,7 @@ typedef struct mmtk__jl_task_t {
void *result;
void *logstate;
void *start;
uint64_t rngState[4];
uint64_t rngState[JL_RNG_SIZE];
_Atomic(uint8_t) _state;
uint8_t sticky; // record whether this Task can be migrated to a new thread
_Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
Expand All @@ -311,6 +312,11 @@ typedef struct mmtk__jl_task_t {
_Atomic(int16_t) tid;
// threadpool id
int8_t threadpoolid;
// Reentrancy bits
// Bit 0: 1 if we are currently running inference/codegen
// Bit 1-2: 0-3 counter of how many times we've reentered inference
// Bit 3: 1 if we are writing the image and inference is illegal
uint8_t reentrant_timing;
// saved gc stack top for context switches
mmtk_jl_gcframe_t *gcstack;
size_t world_age;
Expand All @@ -331,3 +337,128 @@ typedef struct mmtk__jl_task_t {
typedef struct {
mmtk_jl_value_t *value;
} mmtk_jl_weakref_t;

// the following mirrors `struct EffectsOverride` in `base/compiler/effects.jl`
typedef union mmtk___jl_purity_overrides_t {
struct {
uint8_t ipo_consistent : 1;
uint8_t ipo_effect_free : 1;
uint8_t ipo_nothrow : 1;
uint8_t ipo_terminates_globally : 1;
// Weaker form of `terminates` that asserts
// that any control flow syntactically in the method
// is guaranteed to terminate, but does not make
// assertions about any called functions.
uint8_t ipo_terminates_locally : 1;
uint8_t ipo_notaskstate : 1;
uint8_t ipo_inaccessiblememonly : 1;
} overrides;
uint8_t bits;
} mmtk__jl_purity_overrides_t;

// This type describes a single method definition, and stores data
// shared by the specializations of a function.
typedef struct mmtk__jl_method_t {
void *name; // for error reporting
struct mmtk__jl_module_t *module;
void *file;
int32_t line;
size_t primary_world;
size_t deleted_world;

// method's type signature. redundant with TypeMapEntry->specTypes
void *sig;

// table of all jl_method_instance_t specializations we have
_Atomic(void*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
_Atomic(void*) speckeyset; // index lookup by hash into specializations

void *slot_syms; // compacted list of slot names (String)
void *external_mt; // reference to the method table this method is part of, null if part of the internal table
void *source; // original code template (jl_code_info_t, but may be compressed), null for builtins
_Atomic(void*) unspecialized; // unspecialized executable method instance, or null
void *generator; // executable code-generating function if available
void *roots; // pointers in generated code (shared to reduce memory), or null
// Identify roots by module-of-origin. We only track the module for roots added during incremental compilation.
// May be NULL if no external roots have been added, otherwise it's a Vector{UInt64}
void *root_blocks; // RLE (build_id.lo, offset) pairs (even/odd indexing)
int32_t nroots_sysimg; // # of roots stored in the system image
void *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this

// cache of specializations of this method for invoke(), i.e.
// cases where this method was called even though it was not necessarily
// the most specific for the argument types.
_Atomic(void*) invokes;

// A function that compares two specializations of this method, returning
// `true` if the first signature is to be considered "smaller" than the
// second for purposes of recursion analysis. Set to NULL to use
// the default recursion relation.
void *recursion_relation;

uint32_t nargs;
uint32_t called; // bit flags: whether each of the first 8 arguments is called
uint32_t nospecialize; // bit flags: which arguments should not be specialized
uint32_t nkw; // # of leading arguments that are actually keyword arguments
// of another method.
// various boolean properties
uint8_t isva;
uint8_t is_for_opaque_closure;
// uint8 settings
uint8_t constprop; // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
uint8_t max_varargs; // 0xFF = use heuristic; otherwise, max # of args to expand
// varargs when specializing.

// Override the conclusions of inter-procedural effect analysis,
// forcing the conclusion to always true.
mmtk__jl_purity_overrides_t purity;

// hidden fields:
// lock for modifications to the method
mmtk_jl_mutex_t writelock;
} mmtk_jl_method_t;

#define JL_SMALL_TYPEOF(XX) \
/* kinds */ \
XX(typeofbottom) \
XX(datatype) \
XX(unionall) \
XX(uniontype) \
/* type parameter objects */ \
XX(vararg) \
XX(tvar) \
XX(symbol) \
XX(module) \
/* special GC objects */ \
XX(simplevector) \
XX(string) \
XX(task) \
/* bits types with special allocators */ \
XX(bool) \
XX(char) \
/*XX(float16)*/ \
/*XX(float32)*/ \
/*XX(float64)*/ \
XX(int16) \
XX(int32) \
XX(int64) \
XX(int8) \
XX(uint16) \
XX(uint32) \
XX(uint64) \
XX(uint8) \
/* AST objects */ \
/* XX(argument) */ \
/* XX(newvarnode) */ \
/* XX(slotnumber) */ \
/* XX(ssavalue) */ \
/* end of JL_SMALL_TYPEOF */
enum mmtk_jlsmall_typeof_tags {
mmtk_jl_null_tag = 0,
#define XX(name) mmtk_jl_##name##_tag,
JL_SMALL_TYPEOF(XX)
#undef XX
mmtk_jl_tags_count,
mmtk_jl_bitstags_first = mmtk_jl_char_tag, // n.b. bool is not considered a bitstype, since it can be compared by pointer
mmtk_jl_max_tags = 64
};
8 changes: 3 additions & 5 deletions mmtk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ edition = "2018"
[package.metadata.julia]
# Our CI matches the following line and extract mmtk/julia. If this line is updated, please check ci yaml files and make sure it works.
julia_repo = "https://github.com/mmtk/julia.git"
julia_version = "73411572e118d7cfd0110da46663b34cb82eb520"
julia_version = "5733c287d803316e6da1771db35e49c767804112"

[lib]
crate-type = ["cdylib"]
Expand Down Expand Up @@ -42,7 +42,7 @@ chrono = "*"
# ykstackmaps = { git = "https://github.com/udesou/ykstackmaps.git", branch = "udesou-master", version = "*" }

[features]
default = ["scan_obj_c", "mmtk/vm_space"]
default = ["mmtk/vm_space", "julia_copy_stack"]

# Plans
nogc = []
Expand All @@ -52,6 +52,4 @@ marksweep = []

# TODO remove this when we properly support moving
non_moving_immix = ["mmtk/immix_non_moving", "mmtk/immix_smaller_block"]

# FIXME update and use rust object scanner as default for immix
scan_obj_c = []
julia_copy_stack = []
Loading

0 comments on commit db49af4

Please sign in to comment.