Skip to content

Commit

Permalink
df.col .= x is in-place and df.col = scalar is allowed
Browse files Browse the repository at this point in the history
  • Loading branch information
gustafsson committed Oct 25, 2022
1 parent fdfa2f7 commit 2350dd7
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 157 deletions.
44 changes: 32 additions & 12 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -632,11 +632,11 @@ Base.getindex(df::DataFrame, row_ind::typeof(!), col_inds::MultiColumnIndex) =
##############################################################################

# Will automatically add a new column if needed
function insert_single_column!(df::DataFrame, v::AbstractVector, col_ind::ColumnIndex)
if ncol(df) != 0 && nrow(df) != length(v)
function insert_single_column!(df::DataFrame, v::Any, col_ind::ColumnIndex; copycols = false)
dv = _preprocess_column(v, nrow(df), copycols)
if ncol(df) != 0 && nrow(df) != length(dv)
throw(ArgumentError("New columns must have the same length as old columns"))
end
dv = isa(v, AbstractRange) ? collect(v) : v
firstindex(dv) != 1 && _onebased_check_error()

if haskey(index(df), col_ind)
Expand Down Expand Up @@ -664,24 +664,22 @@ function insert_single_entry!(df::DataFrame, v::Any, row_ind::Integer, col_ind::
end
end

# df[!, SingleColumnIndex] = AbstractVector
function Base.setindex!(df::DataFrame, v::AbstractVector, ::typeof(!), col_ind::ColumnIndex)
# df[!, SingleColumnIndex] = value
function Base.setindex!(df::DataFrame, v::Any, ::typeof(!), col_ind::ColumnIndex)
insert_single_column!(df, v, col_ind)
return df
end

# df.col = AbstractVector
# df.col = value
# separate methods are needed due to dispatch ambiguity
Base.setproperty!(df::DataFrame, col_ind::Symbol, v::AbstractVector) =
(df[!, col_ind] = v)
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::AbstractVector) =
(df[!, col_ind] = v)
Base.setproperty!(::DataFrame, col_ind::Symbol, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(::DataFrame, col_ind::AbstractString, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(df::DataFrame, col_ind::Symbol, v::Any) =
(df[!, col_ind] = v)
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::Any) =
(df[!, col_ind] = v)

# df[SingleRowIndex, SingleColumnIndex] = Single Item
function Base.setindex!(df::DataFrame, v::Any, row_ind::Integer, col_ind::ColumnIndex)
Expand Down Expand Up @@ -786,6 +784,28 @@ for T1 in (:AbstractVector, :Not, :Colon, :(typeof(!))),
end
end

for T1 in (:(typeof(!)),),
T2 in MULTICOLUMNINDEX_TUPLE
@eval function Base.setindex!(df::DataFrame,
v::AbstractVector,
row_inds::$T1,
col_inds::$T2)
throw(ArgumentError("a vector can not be assigned to multiple rows and columns, consider reshaping to a matrix first"))
end

@eval function Base.setindex!(df::DataFrame,
v::Any,
row_inds::$T1,
col_inds::$T2)
idxs = index(df)[col_inds]
for col in idxs
# this will drop metadata appropriately
df[row_inds, col] = v
end
return df
end
end

"""
copy(df::DataFrame; copycols::Bool=true)
Expand Down
12 changes: 12 additions & 0 deletions src/other/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ function Base.maybeview(df::AbstractDataFrame, rows, cols)
return view(df, rows, cols)
end

# df[:, cols] .= ...
function Base.dotview(df::AbstractDataFrame, ::Colon, cols::ColumnIndex)
if haskey(index(df), cols)
_drop_all_nonnote_metadata!(parent(df))
Expand All @@ -168,10 +169,15 @@ function Base.dotview(df::AbstractDataFrame, ::Colon, cols::ColumnIndex)
return LazyNewColDataFrame(df, Symbol(cols))
end

# df[!, cols] .= ...
function Base.dotview(df::AbstractDataFrame, ::typeof(!), cols)
if !(cols isa ColumnIndex)
return ColReplaceDataFrame(df, convert(Vector{Int}, index(df)[cols]))
end
if haskey(index(df), cols)
_drop_all_nonnote_metadata!(parent(df))
return view(df, :, cols)
end
if cols isa SymbolOrString
if columnindex(df, cols) == 0 && !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
Expand All @@ -184,7 +190,13 @@ function Base.dotview(df::AbstractDataFrame, ::typeof(!), cols)
end

if isdefined(Base, :dotgetproperty) # Introduced in Julia 1.7
# df.col .= ...
function Base.dotgetproperty(df::AbstractDataFrame, col::SymbolOrString)
if haskey(index(df), col)
_drop_all_nonnote_metadata!(parent(df))
return df[!, col]
end

if columnindex(df, col) == 0 && !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
Expand Down
16 changes: 8 additions & 8 deletions src/subdataframe/subdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ end
# and then define methods for them)
# consider merging SubDataFrame and DataFrame setindex! methods

function Base.setindex!(sdf::SubDataFrame, v::AbstractVector,
function Base.setindex!(sdf::SubDataFrame, v::Any,
::typeof(!), col_ind::ColumnIndex)
if col_ind isa Union{Signed, Unsigned} && !(1 <= col_ind <= ncol(sdf))
throw(ArgumentError("Cannot assign to non-existent column: $col_ind"))
Expand All @@ -219,15 +219,17 @@ function Base.setindex!(sdf::SubDataFrame, v::AbstractVector,
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
v = _preprocess_column(v, nrow(sdf), false)
sdf[:, col_ind] = v
else
pdf = parent(sdf)
p_col_ind = parentcols(index(sdf), col_ind)