Skip to content

Commit

Permalink
Merge pull request #153 from rokostik/spreadsheet-improvements
Browse files Browse the repository at this point in the history
Add spreadsheet improvements
  • Loading branch information
refaktor committed Mar 8, 2024
2 parents d7c0aa0 + 5dac120 commit ffb2521
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 37 deletions.
214 changes: 190 additions & 24 deletions evaldo/builtins_spreadsheet.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ package evaldo

import (
"encoding/csv"
"fmt"
"os"
"regexp"
"slices"
"sort"
"strconv"
"strings"

"github.com/refaktor/rye/env"
)
Expand Down Expand Up @@ -199,13 +200,65 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
case env.String:
return WhereEquals(ps, spr, col.Value, arg2)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType}, "where-equal")
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-equal")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-equal")
}
},
},
"where-match": {
Argsn: 3,
Doc: "Returns spreadsheet of rows where a specific colum matches a regex.",
Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) (res env.Object) {
switch spr := arg0.(type) {
case env.Spreadsheet:
switch reNative := arg2.(type) {
case env.Native:
re, ok := reNative.Value.(*regexp.Regexp)
if !ok {
return MakeArgError(ps, 2, []env.Type{env.NativeType}, "where-match")
}
switch col := arg1.(type) {
case env.Word:
return WhereMatch(ps, spr, ps.Idx.GetWord(col.Index), re)
case env.String:
return WhereMatch(ps, spr, col.Value, re)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-match")
}
default:
return MakeArgError(ps, 3, []env.Type{env.NativeType}, "where-match")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-match")
}
},
},
"where-contains": {
Argsn: 3,
Doc: "Returns spreadsheet of rows where specific colum contains a given string value.",
Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) (res env.Object) {
switch spr := arg0.(type) {
case env.Spreadsheet:
switch s := arg2.(type) {
case env.String:
switch col := arg1.(type) {
case env.Word:
return WhereContains(ps, spr, ps.Idx.GetWord(col.Index), s.Value)
case env.String:
return WhereContains(ps, spr, col.Value, s.Value)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-contains")
}
default:
return MakeArgError(ps, 3, []env.Type{env.StringType}, "where-contains")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-contains")
}
},
},
"where-greater": {
Argsn: 3,
Doc: "Returns spreadsheet of rows where specific colum is greater than given value.",
Expand All @@ -218,7 +271,7 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
case env.String:
return WhereGreater(ps, spr, col.Value, arg2)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType}, "where-greater")
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-greater")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-greater")
Expand All @@ -237,13 +290,32 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
case env.String:
return WhereLesser(ps, spr, col.Value, arg2)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType}, "where-lesser")
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-lesser")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-lesser")
}
},
},
"where-between": {
Argsn: 4,
Doc: "Returns spreadsheet of rows where specific colum is between given values.",
Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) (res env.Object) {
switch spr := arg0.(type) {
case env.Spreadsheet:
switch col := arg1.(type) {
case env.Word:
return WhereBetween(ps, spr, ps.Idx.GetWord(col.Index), arg2, arg3)
case env.String:
return WhereBetween(ps, spr, col.Value, arg2, arg3)
default:
return MakeArgError(ps, 2, []env.Type{env.WordType, env.StringType}, "where-between")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "where-between")
}
},
},
"limit": {
Argsn: 2,
Doc: "Returns spreadsheet with number of rows limited to second argument.",
Expand Down Expand Up @@ -361,16 +433,38 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) env.Object {
switch spr := arg0.(type) {
case env.Spreadsheet:
switch name := arg1.(type) {
switch newCol := arg1.(type) {
case env.Word:
switch extract := arg2.(type) {
switch fromCols := arg2.(type) {
case env.Block:
switch code := arg3.(type) {
case env.Block:
return GenerateColumn(ps, spr, name, extract, code)
return GenerateColumn(ps, spr, newCol, fromCols, code)
default:
return MakeArgError(ps, 4, []env.Type{env.BlockType}, "add-col!")
}
case env.Word:
switch replaceBlock := arg3.(type) {
case env.Block:
if replaceBlock.Series.Len() != 2 {
return MakeBuiltinError(ps, "Replacement block must contain a regex object and replacement string.", "add-col!")
}
regexNative, ok := replaceBlock.Series.S[0].(env.Native)
if !ok {
return MakeBuiltinError(ps, "First element of replacement block must be a regex object.", "add-col!")
}
regex, ok := regexNative.Value.(*regexp.Regexp)
if !ok {
return MakeBuiltinError(ps, "First element of replacement block must be a regex object.", "add-col!")
}
replaceStr, ok := replaceBlock.Series.S[1].(env.String)
if !ok {
return MakeBuiltinError(ps, "Second element of replacement block must be a string.", "add-col!")
}
return GenerateColumnRegexReplace(ps, spr, newCol, fromCols, regex, replaceStr.Value)
default:
return MakeArgError(ps, 3, []env.Type{env.BlockType}, "add-col!")
}
default:
return MakeArgError(ps, 3, []env.Type{env.BlockType}, "add-col!")
}
Expand Down Expand Up @@ -468,6 +562,31 @@ func GenerateColumn(ps *env.ProgramState, s env.Spreadsheet, name env.Word, extr
return s
}

func GenerateColumnRegexReplace(ps *env.ProgramState, s env.Spreadsheet, name env.Word, fromColName env.Word, re *regexp.Regexp, pattern string) env.Object {
// add name to columns
s.Cols = append(s.Cols, ps.Idx.GetWord(name.Index))
for ix, row := range s.Rows {
// get value from current row
val, err := s.GetRowValue(ps.Idx.GetWord(fromColName.Index), row)
if err != nil {
return MakeError(ps, "Couldn't retrieve value at row "+strconv.Itoa(ix))
}

var newVal any
valStr, ok := val.(env.String)
if !ok {
newVal = ""
} else {
// replace the value with the regex
newVal = env.NewString(re.ReplaceAllString(valStr.Value, pattern))
}
// set the result of code block as the new column value in this row
row.Values = append(row.Values, newVal)
s.Rows[ix] = row
}
return s
}

func AddIndexes(ps *env.ProgramState, s *env.Spreadsheet, columns []env.Word) env.Object {
s.Indexes = make(map[string]map[any][]int, 0)
for _, column := range columns {
Expand Down Expand Up @@ -509,9 +628,7 @@ func SortByColumnDesc(ps *env.ProgramState, s *env.Spreadsheet, name string) {
sort.Slice(s.Rows, compareCol)
}

func WhereEquals(ps *env.ProgramState, s env.Spreadsheet, name string, val any) env.Object {
fmt.Println(s.Cols)
fmt.Println(name)
func WhereEquals(ps *env.ProgramState, s env.Spreadsheet, name string, val env.Object) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
Expand All @@ -523,11 +640,8 @@ func WhereEquals(ps *env.ProgramState, s env.Spreadsheet, name string, val any)
} else {
for _, row := range s.Rows {
if len(row.Values) > idx {
switch val2 := val.(type) {
case env.Object:
if val2.Equal(env.ToRyeValue(row.Values[idx])) {
nspr.AddRow(row)
}
if val.Equal(env.ToRyeValue(row.Values[idx])) {
nspr.AddRow(row)
}
}
}
Expand All @@ -538,46 +652,98 @@ func WhereEquals(ps *env.ProgramState, s env.Spreadsheet, name string, val any)
}
}

func WhereGreater(ps *env.ProgramState, s env.Spreadsheet, name string, val any) env.Object {
func WhereMatch(ps *env.ProgramState, s env.Spreadsheet, name string, r *regexp.Regexp) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
for _, row := range s.Rows {
if len(row.Values) > idx {
switch val2 := val.(type) {
case env.Object:
if greaterThanNew(row.Values[idx].(env.Object), val2) {
rv := row.Values[idx]
if rvStr, ok := rv.(env.String); ok {
if r.MatchString(rvStr.Value) {
nspr.AddRow(row)
}
}
}
}
return *nspr
} else {
return MakeBuiltinError(ps, "Column not found.", "WhereGreater")
return MakeBuiltinError(ps, "Column not found.", "WhereMatch")
}
}

func WhereLesser(ps *env.ProgramState, s env.Spreadsheet, name string, val any) env.Object {
func WhereContains(ps *env.ProgramState, s env.Spreadsheet, name string, val string) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
for _, row := range s.Rows {
if len(row.Values) > idx {
switch val2 := val.(type) {
case env.Object:
if lesserThanNew(row.Values[idx].(env.Object), val2) {
rv := row.Values[idx]
if rvStr, ok := rv.(env.String); ok {
if strings.Contains(rvStr.Value, val) {
nspr.AddRow(row)
}
}
}
}
return *nspr
} else {
return MakeBuiltinError(ps, "Column not found.", "WhereMatch")
}
}

func WhereGreater(ps *env.ProgramState, s env.Spreadsheet, name string, val env.Object) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
for _, row := range s.Rows {
if len(row.Values) > idx {
if greaterThanNew(row.Values[idx].(env.Object), val) {
nspr.AddRow(row)
}
}
}
return *nspr
} else {
return MakeBuiltinError(ps, "Column not found.", "WhereGreater")
}
}

func WhereLesser(ps *env.ProgramState, s env.Spreadsheet, name string, val env.Object) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
for _, row := range s.Rows {
if len(row.Values) > idx {
if lesserThanNew(row.Values[idx].(env.Object), val) {
nspr.AddRow(row)
}
}
}
return *nspr
} else {
return MakeBuiltinError(ps, "Column not found.", "WhereGreater")
}
}

func WhereBetween(ps *env.ProgramState, s env.Spreadsheet, name string, val1 env.Object, val2 env.Object) env.Object {
idx := slices.Index(s.Cols, name)
nspr := env.NewSpreadsheet(s.Cols)
if idx > -1 {
for _, row := range s.Rows {
if len(row.Values) > idx {
rv := row.Values[idx].(env.Object)
if greaterThanNew(rv, val1) && lesserThanNew(rv, val2) {
nspr.AddRow(row)
}
}
}
return *nspr
} else {
return MakeBuiltinError(ps, "Column not found.", "WhereBetween")
}
}

func Limit(ps *env.ProgramState, s env.Spreadsheet, n int) env.Object {
nspr := env.NewSpreadsheet(s.Cols)
nspr.Rows = s.Rows[0:n]
Expand Down
22 changes: 11 additions & 11 deletions examples/spreadsheet/data.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ID,Name,Department,Years at Company
1,John Doe,Marketing,2
2,Jane Smith,Engineering,5
3,Robert Johnson,Human Resources,1
4,Julia Davis,Sales,3
5,James Brown,Engineering,2
6,Mary Williams,Marketing,4
7,Michael Miller,Human Resources,3
8,Linda Wilson,Sales,1
9,William Moore,Engineering,2
10,Elizabeth Taylor,Marketing,3
ID,Name,Department,Years at Company,DOB
1,John Doe,Marketing,2,1985-01-01
2,Jane Smith,Engineering,5,1988-12-31
3,Robert Johnson,Human Resources,1,1990-05-15
4,Julia Davis,Sales,3,1987-07-04
5,James Brown,Engineering,2,1989-11-11
6,Mary Williams,Marketing,4,1986-03-22
7,Michael Miller,Human Resources,3,1987-09-30
8,Linda Wilson,Sales,1,1991-02-28
9,William Moore,Engineering,2,1989-10-10
10,Elizabeth Taylor,Marketing,3,1988-04-01
9 changes: 8 additions & 1 deletion examples/spreadsheet/file.rye
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,11 @@ spr: load\csv file://data.csv

spr .add-index! { Name } :spr

spr .where-equal 'Name "John Doe"
spr .where-equal 'Name "John Doe"

; two examples of how to add add a new column based on a regular expression replace of an existing column
; the first uses the more general .add-col! functionality,
; the second uses the specific replace functionality and is faster
r: regexp "(\d{4})-(\d{2})-(\d{2})"
spr .add-col! 'DOB-SLO { DOB } { r .replace-all DOB "$3.$2.$1" }
spr .add-col! 'DOB-SLO2 'DOB [ r "$3.$2.$1" ]
5 changes: 4 additions & 1 deletion tests/structures.rye
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,11 @@ section "Spreadsheet related functions"
equal\todo { spr -> 2 -> 'name } "Enya"
equal { spr .first -> "name" } "Enno"
equal { spr .where-equal 'name "Enya" |length? } 1
equal { spr .where-contains 'name "En" |length? } 2
equal { spr .where-match 'name regexp "En.." |length? } 2
equal { spr .where-greater 'weight 140 |length? } 1
equal { spr .where-lesser 'weight 130 |length? } 2

equal { spr .where-between 'weight 130 170 |length? } 2
}

group "autotype"
Expand Down

0 comments on commit ffb2521

Please sign in to comment.