Skip to content

Commit

Permalink
clean up handling of special types with opaque layouts
Browse files Browse the repository at this point in the history
This makes it clearer which values have representations not reflected
in the usual nfields/sizeof model.

Also remove `length` fields from `String` and `SimpleVector`, since
they're misleading and will probably be removed in the future.
  • Loading branch information
JeffBezanson committed Jun 26, 2017
1 parent c2c37ef commit 2b47865
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 68 deletions.
9 changes: 5 additions & 4 deletions base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,13 @@ function getindex(v::SimpleVector, i::Int)
return unsafe_pointer_to_objref(x)
end

length(v::SimpleVector) = v.length
endof(v::SimpleVector) = v.length
# TODO: add gc use intrinsic call instead of noinline
length(v::SimpleVector) = (@_noinline_meta; unsafe_load(convert(Ptr{Int},data_pointer_from_objref(v))))
endof(v::SimpleVector) = length(v)
start(v::SimpleVector) = 1
next(v::SimpleVector,i) = (v[i],i+1)
done(v::SimpleVector,i) = (i > v.length)
isempty(v::SimpleVector) = (v.length == 0)
done(v::SimpleVector,i) = (i > length(v))
isempty(v::SimpleVector) = (length(v) == 0)
indices(v::SimpleVector) = (OneTo(length(v)),)
linearindices(v::SimpleVector) = indices(v, 1)
indices(v::SimpleVector, d) = d <= 1 ? indices(v)[d] : OneTo(1)
Expand Down
2 changes: 1 addition & 1 deletion base/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ rand(rng::AbstractRNG, r::AbstractArray, dims::Integer...) = rand(rng, r, conver

isvalid_unsafe(s::String, i) = !Base.is_valid_continuation(unsafe_load(pointer(s), i))
isvalid_unsafe(s::AbstractString, i) = isvalid(s, i)
_endof(s::String) = s.len
_endof(s::String) = sizeof(s)
_endof(s::AbstractString) = endof(s)

function rand(rng::AbstractRNG, s::AbstractString)::Char
Expand Down
50 changes: 26 additions & 24 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,31 +62,33 @@ returns the `i`th byte of the representation of a UTF-8 string.
codeunit(s::AbstractString, i::Integer)

@inline function codeunit(s::String, i::Integer)
@boundscheck if (i < 1) | (i > s.len)
@boundscheck if (i < 1) | (i > sizeof(s))
throw(BoundsError(s,i))
end
unsafe_load(pointer(s),i)
end

write(io::IO, s::String) = unsafe_write(io, pointer(s), reinterpret(UInt, s.len))
write(io::IO, s::String) = unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s)))

## comparison ##

function cmp(a::String, b::String)
al, bl = sizeof(a), sizeof(b)
c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
a, b, min(a.len,b.len))
return c < 0 ? -1 : c > 0 ? +1 : cmp(a.len,b.len)
a, b, min(al,bl))
return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
end

function ==(a::String, b::String)
a.len == b.len && 0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, a.len)
al = sizeof(a)
al == sizeof(b) && 0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, al)
end

## prevind and nextind ##

function prevind(s::String, i::Integer)
j = Int(i)
e = s.len
e = sizeof(s)
if j > e
return endof(s)
end
Expand All @@ -102,7 +104,7 @@ function nextind(s::String, i::Integer)
if j < 1
return 1
end
e = s.len
e = sizeof(s)
j += 1
@inbounds while j <= e && is_valid_continuation(codeunit(s,j))
j += 1
Expand All @@ -115,7 +117,7 @@ end
byte_string_classify(data::Vector{UInt8}) =
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data))
byte_string_classify(s::String) =
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, s.len)
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
# 0: neither valid ASCII nor UTF-8
# 1: valid ASCII
# 2: valid UTF-8
Expand Down Expand Up @@ -151,7 +153,7 @@ const utf8_trailing = [

function endof(s::String)
p = pointer(s)
i = s.len
i = sizeof(s)
while i > 0 && is_valid_continuation(unsafe_load(p,i))
i -= 1
end
Expand All @@ -161,7 +163,7 @@ end
function length(s::String)
p = pointer(s)
cnum = 0
for i = 1:s.len
for i = 1:sizeof(s)
cnum += !is_valid_continuation(unsafe_load(p,i))
end
cnum
Expand All @@ -187,21 +189,21 @@ end

# This implementation relies on `next` returning a value past the end of the
# String's underlying data, which is true for valid Strings
done(s::String, state) = state > s.len
done(s::String, state) = state > sizeof(s)

@inline function next(s::String, i::Int)
# function is split into this critical fast-path
# for pure ascii data, such as parsing numbers,
# and a longer function that can handle any utf8 data
@boundscheck if (i < 1) | (i > s.len)
@boundscheck if (i < 1) | (i > sizeof(s))
throw(BoundsError(s,i))
end
p = pointer(s)
b = unsafe_load(p, i)
if b < 0x80
return Char(b), i + 1
end
return slow_utf8_next(p, b, i, s.len)
return slow_utf8_next(p, b, i, sizeof(s))
end

function first_utf8_byte(ch::Char)
Expand All @@ -214,7 +216,7 @@ function first_utf8_byte(ch::Char)
end

function reverseind(s::String, i::Integer)
j = s.len + 1 - i
j = sizeof(s) + 1 - i
p = pointer(s)
while is_valid_continuation(unsafe_load(p,j))
j -= 1
Expand All @@ -225,12 +227,12 @@ end
## overload methods for efficiency ##

isvalid(s::String, i::Integer) =
(1 <= i <= s.len) && !is_valid_continuation(unsafe_load(pointer(s),i))
(1 <= i <= sizeof(s)) && !is_valid_continuation(unsafe_load(pointer(s),i))

function getindex(s::String, r::UnitRange{Int})
isempty(r) && return ""
i, j = first(r), last(r)
l = s.len
l = sizeof(s)
if i < 1 || i > l
throw(BoundsError(s, i))
end
Expand Down Expand Up @@ -282,7 +284,7 @@ function search(a::ByteArray, b::Char, i::Integer = 1)
end
end

function rsearch(s::String, c::Char, i::Integer = s.len)
function rsearch(s::String, c::Char, i::Integer = sizeof(s))
c < Char(0x80) && return rsearch(s, c%UInt8, i)
b = first_utf8_byte(c)
while true
Expand All @@ -292,7 +294,7 @@ function rsearch(s::String, c::Char, i::Integer = s.len)
end
end

function rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = s.len)
function rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(s))
if i < 1
return i == 0 ? 0 : throw(BoundsError(a, i))
end
Expand Down Expand Up @@ -321,13 +323,13 @@ function string(a::String...)
end
n = 0
for str in a
n += str.len
n += sizeof(str)
end
out = _string_n(n)
offs = 1
for str in a
unsafe_copy!(pointer(out,offs), pointer(str), str.len)
offs += str.len
unsafe_copy!(pointer(out,offs), pointer(str), sizeof(str))
offs += sizeof(str)
end
return out
end
Expand All @@ -353,7 +355,7 @@ function string(a::Union{String,Char}...)
if isa(d,Char)
n += codelen(d::Char)
else
n += (d::String).len
n += sizeof(d::String)
end
end
out = _string_n(n)
Expand Down Expand Up @@ -383,7 +385,7 @@ function string(a::Union{String,Char}...)
unsafe_store!(p, 0xbd, offs); offs += 1
end
else
l = (d::String).len
l = sizeof(d::String)
unsafe_copy!(pointer(out,offs), pointer(d::String), l)
offs += l
end
Expand Down Expand Up @@ -425,7 +427,7 @@ end

function repeat(s::String, r::Integer)
r < 0 && throw(ArgumentError("can't repeat a string $r times"))
n = s.len
n = sizeof(s)
out = _string_n(n*r)
if n == 1 # common case: repeating a single ASCII char
ccall(:memset, Ptr{Void}, (Ptr{UInt8}, Cint, Csize_t), out, unsafe_load(pointer(s)), r)
Expand Down
4 changes: 2 additions & 2 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ end
endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars

startswith(a::String, b::String) =
(a.len >= b.len && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, b.len) == 0)
(sizeof(a) >= sizeof(b) && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, sizeof(b)) == 0)
startswith(a::Vector{UInt8}, b::Vector{UInt8}) =
(length(a) >= length(b) && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0)

Expand Down Expand Up @@ -109,7 +109,7 @@ end
# NOTE: use with caution -- breaks the immutable string convention!
# TODO: this is hard to provide with the new representation
#function chomp!(s::String)
# if !isempty(s) && codeunit(s,s.len) == 0x0a
# if !isempty(s) && codeunit(s,sizeof(s)) == 0x0a
# n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2
# ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n)
# end
Expand Down
3 changes: 1 addition & 2 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,7 @@ JL_CALLABLE(jl_f_sizeof)
}
if (jl_is_datatype(x)) {
jl_datatype_t *dx = (jl_datatype_t*)x;
if (dx->name == jl_array_typename || dx == jl_symbol_type || dx == jl_simplevector_type ||
dx == jl_string_type)
if (dx->layout && jl_is_layout_opaque(dx->layout))
jl_error("type does not have a canonical binary representation");
if (!(dx->name->names == jl_emptysvec && jl_datatype_size(dx) > 0)) {
// names===() and size > 0 => bitstype, size always known
Expand Down
19 changes: 15 additions & 4 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,21 @@ void jl_compute_field_offsets(jl_datatype_t *st)
if (st->types == NULL)
return;
uint32_t nfields = jl_svec_len(st->types);
if (nfields == 0 && st != jl_sym_type && st != jl_simplevector_type) {
// reuse the same layout for all singletons
static const jl_datatype_layout_t singleton_layout = {0, 1, 0, 0, 0};
st->layout = &singleton_layout;
if (nfields == 0) {
if (st == jl_sym_type || st == jl_string_type) {
// opaque layout - heap-allocated blob
static const jl_datatype_layout_t opaque_byte_layout = {0, 1, 0, 1, 0};
st->layout = &opaque_byte_layout;
}
else if (st == jl_simplevector_type || st->name == jl_array_typename) {
static const jl_datatype_layout_t opaque_ptr_layout = {0, sizeof(void*), 0, 1, 0};
st->layout = &opaque_ptr_layout;
}
else {
// reuse the same layout for all singletons
static const jl_datatype_layout_t singleton_layout = {0, 1, 0, 0, 0};
st->layout = &singleton_layout;
}
return;
}
if (!jl_is_leaf_type((jl_value_t*)st)) {
Expand Down
18 changes: 6 additions & 12 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1665,15 +1665,15 @@ void jl_init_types(void)
jl_simplevector_type->name->mt = jl_new_method_table(jl_simplevector_type->name->name, core);
jl_simplevector_type->super = jl_any_type;
jl_simplevector_type->parameters = jl_emptysvec;
jl_simplevector_type->name->names = jl_perm_symsvec(1, "length");
jl_simplevector_type->types = jl_svec(1, jl_any_type);
jl_simplevector_type->name->names = jl_emptysvec;
jl_simplevector_type->types = jl_emptysvec;
jl_simplevector_type->uid = jl_assign_type_uid();
jl_simplevector_type->instance = NULL;
jl_simplevector_type->struct_decl = NULL;
jl_simplevector_type->ditype = NULL;
jl_simplevector_type->abstract = 0;
jl_simplevector_type->mutabl = 1;
jl_simplevector_type->ninitialized = 1;
jl_simplevector_type->ninitialized = 0;

// now they can be used to create the remaining base kinds and types
jl_void_type = jl_new_datatype(jl_symbol("Void"), core, jl_any_type, jl_emptysvec,
Expand Down Expand Up @@ -1834,8 +1834,7 @@ void jl_init_types(void)
tv,
jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
static const jl_datatype_layout_t _jl_array_layout = { 0, sizeof(void*), 0, 1, 0 };
((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout = &_jl_array_layout;
jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));

jl_array_any_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_any_type, jl_box_long(1));

Expand Down Expand Up @@ -2028,8 +2027,8 @@ void jl_init_types(void)

jl_abstractstring_type = jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractString"), core, jl_any_type, jl_emptysvec);
jl_string_type = jl_new_datatype(jl_symbol("String"), core, jl_abstractstring_type, jl_emptysvec,
jl_perm_symsvec(1, "len"), jl_svec1(jl_long_type),
0, 1, 1);
jl_emptysvec, jl_emptysvec, 0, 1, 0);
jl_compute_field_offsets(jl_string_type);

// complete builtin type metadata
jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_void_type);
Expand All @@ -2045,7 +2044,6 @@ void jl_init_types(void)
jl_svecset(jl_datatype_type->types, 13, jl_int32_type);
jl_svecset(jl_datatype_type->types, 14, jl_bool_type);
jl_svecset(jl_datatype_type->types, 15, jl_bool_type);
jl_svecset(jl_simplevector_type->types, 0, jl_long_type);
jl_svecset(jl_typename_type->types, 1, jl_module_type);
jl_svecset(jl_typename_type->types, 6, jl_long_type);
jl_svecset(jl_typename_type->types, 3, jl_type_type);
Expand Down Expand Up @@ -2081,10 +2079,6 @@ void jl_init_types(void)
jl_compute_field_offsets(jl_simplevector_type);
jl_compute_field_offsets(jl_sym_type);

// TODO: don't modify layout objects
((jl_datatype_layout_t*)jl_sym_type->layout)->npointers = 1;
((jl_datatype_layout_t*)jl_simplevector_type->layout)->npointers = 1;

jl_cfunction_list.unknown = jl_nothing;
}

Expand Down
5 changes: 5 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,11 @@ static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type)

#undef DEFINE_FIELD_ACCESSORS

static inline int jl_is_layout_opaque(jl_datatype_layout_t *l)
{
return l->nfields == 0 && l->npointers > 0;
}

// basic predicates -----------------------------------------------------------
#define jl_is_nothing(v) (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing))
#define jl_is_tuple(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_tuple_typename)
Expand Down
24 changes: 13 additions & 11 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,19 @@ static void jl_write_values(jl_serializer_state *s)
else if (jl_typeis(v, jl_task_type)) {
jl_error("Task cannot be serialized");
}
else if (jl_is_svec(v)) {
ios_write(s->s, (char*)v, sizeof(void*));
size_t i, l = jl_svec_len(v);
assert(l > 0);
for (i = 0; i < l; i++) {
write_pointerfield(s, jl_svecref(v, i));
}
}
else if (jl_is_string(v)) {
ios_write(s->s, (char*)v, sizeof(void*));
ios_write(s->s, jl_string_data(v), jl_string_len(v));
write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility
}
else if (jl_datatype_nfields(t) == 0) {
assert(t->layout->npointers == 0);
if (t->size > 0)
Expand Down Expand Up @@ -708,17 +721,6 @@ static void jl_write_values(jl_serializer_state *s)
newm->functionObjectsDecls.functionObject = NULL;
newm->functionObjectsDecls.specFunctionObject = NULL;
}
else if (jl_is_svec(v)) {
size_t i, l = jl_svec_len(v);
assert(l > 0);
for (i = 0; i < l; i++) {
write_pointerfield(s, jl_svecref(v, i));
}
}
else if (jl_is_string(v)) {
ios_write(s->s, jl_string_data(v), jl_string_len(v));
write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility
}
else if (jl_is_datatype(v)) {
jl_datatype_t *dt = (jl_datatype_t*)v;
jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset];
Expand Down
3 changes: 1 addition & 2 deletions test/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -898,8 +898,7 @@ end
@test isdefined_tfunc(Const(Base), Const(:length)) === Const(true)
@test isdefined_tfunc(Const(Base), Symbol) == Bool
@test isdefined_tfunc(Const(Base), Const(:NotCurrentlyDefinedButWhoKnows)) == Bool
@test isdefined_tfunc(SimpleVector, Const(1)) === Const(true)
@test isdefined_tfunc(SimpleVector, Const(:length)) === Const(true)
@test isdefined_tfunc(SimpleVector, Const(1)) === Const(false)
@test Const(false) isdefined_tfunc(Const(:x), Symbol)
@test Const(false) isdefined_tfunc(Const(:x), Const(:y))
@test isdefined_tfunc(Vector{Int}, Const(1)) == Bool
Expand Down
Loading

0 comments on commit 2b47865

Please sign in to comment.