Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sum of top k items in a steam #21

Open
ishu9bansal opened this issue Jan 22, 2023 · 3 comments
Open

sum of top k items in a steam #21

ishu9bansal opened this issue Jan 22, 2023 · 3 comments

Comments

@ishu9bansal
Copy link
Owner

It will be implemented by a priority queue of size k.

Example use case https://leetcode.com/contest/biweekly-contest-93/problems/maximum-star-sum-of-a-graph/

@ishu9bansal
Copy link
Owner Author

aggregation of top k elements in a stream or a floating window, with different flexibility.
It is possible in O(n) where n is the size of total iteration. With query and updates in O(1) average during the iteration.
This impl achieves the query and updates in O(log n)
https://leetcode.com/submissions/detail/1158279622/

class topK{
private:
    multiset<int> l,r;
    int k;
    long long sum;
public:
    topK(int K=0) : k(K), sum(0) {
        if(k<0) k = 0;
    }
    long long get() {
        return sum;
    }
    void add(int x) {
        l.insert(-x);   // trick for sorting in descending order
        sum += x;   // update aggregator
        int y;
        while(l.size()>k){  // maintain container size
            y = *l.begin();
            sum += y;   // adjust aggregator before removal
            l.erase(l.begin()); // remove extra and move it to other container
            r.insert(-y);    // reversing again to sort in ascending order
        }
    }
    void remove(int x) {
        auto it = l.find(-x);   // find in primary container
        if(it!=l.end()){    // if found
            int y = *it;
            sum += y;   // adjust aggregator before removal
            l.erase(it);    // remove the required and expect from other container
            while(l.size()<k && r.size()>0) {   // container size check
                y = *r.begin(); // get from secondary container
                l.insert(-y);   // insert in primary container
                sum += y;       // update aggregator
                r.erase(r.begin()); // erase from secondary container
            }
        } else {    // if not found in primary search and remove from secondary
            auto jt = r.find(x);
            if(jt!=r.end()) r.erase(jt);
        }
    }
    int expectation() {
        // this number tells that how many elements short we are to complete the k elements
        // if number is negative, it means that we overshoot and want to drop a few elements
        return k-l.size()-r.size();
    }
};

@ishu9bansal
Copy link
Owner Author

ishu9bansal commented Jan 27, 2024

https://leetcode.com/problems/divide-an-array-into-subarrays-with-minimum-cost-ii/submissions/1158327361/

abstraction of the aggregation methods

template <class T>
class aggregator{
protected:
    T aggregate;
public:
    virtual void add(int, int) = 0;
    virtual void remove(int, int) = 0;
    T get() {
        return aggregate;
    }
};

class Sum: public aggregator<long long> {
public:
    Sum() {
        this->aggregate = 0;
    }
    void add(int x, int bound) {
        aggregate += x;
    }
    void remove(int x, int bound) {
        aggregate -= x;
    }
};

class Count: public aggregator<int> {
public:
    Count() {
        this->aggregate = 0;
    }
    void add(int x, int bound) {
        aggregate++;
    }
    void remove(int x, int bound) {
        aggregate--;
    }
};

class Max: public aggregator<int> {
public:
    Max() {
        this->aggregate = INT_MIN;
    }
    void add(int x, int bound) {
        aggregate = bound;
    }
    void remove(int x, int bound) {
        aggregate = bound;
    }
};
class topK{
private:
    multiset<int> l,r;
    int k;
    Sum sum;
    Count count;
    Max max;
    void aggregateAdd(int x) {
        int bound = 0;
        if(l.size())  bound = -*l.begin();
        sum.add(x,bound);
        count.add(x,bound);
        if(l.empty())   bound = INT_MIN;
        max.add(x,bound);
    }
    void aggregateRemove(int x) {
        int bound = 0;
        if(l.size())  bound = -*l.begin();
        sum.remove(x,bound);
        count.remove(x,bound);
        if(l.empty())   bound = INT_MIN;
        max.remove(x,bound);
    }
public:
    topK(int K=0) : k(K) {
        if(k<0) k = 0;
    }
    long long getSum() {
        return sum.get();
    }
    int getCount() {
        return count.get();
    }
    int getMax() {
        return max.get();
    }
    void add(int x) {
        l.insert(-x);   // trick for sorting in descending order
        aggregateAdd(x);   // update aggregator, positioning is important!
        int y;
        while(l.size()>k){  // maintain container size
            y = -*l.begin();
            l.erase(l.begin()); // remove extra and move it to other container
            aggregateRemove(y);   // adjust aggregator after removal, positioning is important!
            r.insert(y);    // reversing again to sort in ascending order
        }
    }
    void remove(int x) {
        auto it = l.find(-x);   // find in primary container
        if(it!=l.end()){    // if found
            int y = -*it;
            l.erase(it);    // remove the required and expect from other container
            aggregateRemove(y);   // adjust aggregator after removal, positioning is important!
            while(l.size()<k && r.size()>0) {   // container size check
                y = *r.begin(); // get from secondary container
                l.insert(-y);   // insert in primary container
                aggregateAdd(y);       // update aggregator, positioning is important!
                r.erase(r.begin()); // erase from secondary container
            }
        } else {    // if not found in primary search and remove from secondary
            auto jt = r.find(x);
            if(jt!=r.end()) r.erase(jt);
        }
    }
};

@ishu9bansal
Copy link
Owner Author

Possible improvements

  • Interpretation of k=0?
  • Stream of numbers with top k limit, add option to make right - r set optional so simulate a stream (we don't need remove method in that case)
  • Add possibility of adding more aggregations, custom aggregations.
  • Improve the aggregation plug and play.
  • Major improvement, bring the O(n) version. Convert this to queue based implementation
  • Flexibility of sort direction
  • Flexibility of other data classes, why only int?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant