Monday, April 12, 2010

Largest rectangle in histogram problem

The problem is to find the largest rectangle under a histogram.

This question is the last question from 2003/2004 ACM International Collegiate Programming Contest, Univ. of Ulm Local Contest. It's one of the 2 hard ones in this question set. See the problem description and the solution.

The naive algorithm is for each column, scan all the other columns to find max rectangle. This is O(n^2). O(n*log(n)) solution exists, for example, using order statistic trees. O(n) solution also exists, for example, using a stack. See above link for the solutions.

The O(n) solution using a stack is not difficult to code. About 50 lines of C++ code will do. See below.
#include <iostream>
#include <stack>
using namespace std;

void getMax(int hist[], stack<int> * s, int right, int & max) {
  int i = s->top();
  int height = hist[i];
  s->pop();
  int left = (s->size() > 0) ? s->top() : -1;
  int area = height * (right - left);
  if (area > max) max = area;
}

void doHist(int hist[], int len) {
  stack<int> * s = new stack<int>;
  int i, max, top_v, right;

  max = 0;
  for (i = 0; i < len; i ++) {
    if (s->size() == 0) {
      s->push(i);
      continue;
    }

    top_v = hist[s->top()];
    if (hist[i] > top_v) {
      s->push(i);
    } else if (hist[i] < top_v) {
      while (s->size() > 0 && hist[i] < hist[s->top()]) {
        getMax(hist, s, i - 1, max);
      }
      s->push(i);
    }
  }

  while (s->size() > 0) getMax(hist, s, i - 1 , max);

  cout << "max = " << max << endl;
}

int main() {
  int hist[] = {3, 5, 4, 7, 6, 5, 2}; // answer: 20
  doHist(hist, sizeof(hist) / sizeof(int));
  return 0;
}

[5/28/2010]
The above implementation contains a bug. The following is updated version on 5/9/2010.
#include <iostream>
#include <sstream>
#include <stack>
using namespace std;

int DEBUG = 0;

//
// Note that if s->size() becomes 0, then left = start.
// This is useful when entry in hist[] can be zero.
// If all entries in hist[] are positive, then no need to use start,
// In that case, use -1 in place of start in this function.
//
void getMax(int hist[], stack<int> * s, 
            int newHeight, int right, int & max, int & start) {
  int height, left = 0, area;
  while (s->size() > 0 && hist[s->top()] > newHeight) {
    height = hist[s->top()];
    s->pop();
    left = (s->size() > 0) ? s->top() : start; 
    while (s->size() > 0 && hist[s->top()] == height) {
      s->pop();
      left = (s->size() > 0) ? s->top() : start; 
    }

    area = height * (right - left);
    if (DEBUG) printf("area: %d * (%d - %d) = %d\n", height, right, left, area);
    if (area > max) max = area;
  }
}

//
// Note that when hist[i] == top_v, we push i.
// In the acm judge site, it says skip i if equal. 
// But I feel somehow it can't keep track of the left value
// when multiple columns have the same height.
//
int doHist(int hist[], int len) {
  stack<int> * s = new stack<int>;
  int i, max, top_v;
  int start = -1; // the position before the last 0, used by left.

  max = 0;
  for (i = 0; i < len; i ++) {
    if (s->size() == 0) {
      s->push(i);
      continue;
    }

    top_v = hist[s->top()];
    if (hist[i] >= top_v) {
      s->push(i);
    } else if (hist[i] < top_v) {
      getMax(hist, s, hist[i], i - 1, max, start);
      s->push(i); 
      if (hist[i] == 0) start = i - 1;
    }
  }

  getMax(hist, s, 0, i - 1 , max, start);

  cout << "max = " << max << endl;
  return max;
}

int main() {
  int hist[] = {3, 5, 4, 7, 6, 5, 2}; // answer: 20
  doHist(hist, sizeof(hist) / sizeof(int));
  return 0;
}


[1/22/2013]
Found simpler and working solutions:
// Use stack. O(n). 
    int getArea(vector &h) {
        if (h.size() == 0) return 0;
        h.push_back(0);
        
        stack s;
        int i = 0, area = 0;
        
        while (i < h.size()) {
            if (s.empty() || h[s.top()] <= h[i]) {
                s.push(i ++); // if bar goes higher, push to stack.
            }
            else { // bar lower, pop and update area.
                int t = s.top();
                s.pop();
                area = max(area, h[t] * (s.empty() ? i : i - s.top() - 1));
            }
        }
        
        return area;
    }

// Binary search. O(n*log(n)). Call with getArea(height, 0, height.size() - 1).
    int getArea(int h[], int L, int R)
    {
        if (L > R) return 0;
    
        int minIndex = L;
        for (int i = L+1; i <= R; ++ i) {
            if (h[minIndex] > h[i]) minIndex = i;
        }
       
        int width = R - L + 1;
        int areaT = width * h[minIndex];
        int areaL = getArea(h, L, minIndex - 1);
        int areaR = getArea(h, minIndex + 1, R);
    
        return max(areaT, max(areaL, areaR));
    } 

9 comments:

Anonymous said...

I googled out your post. Good Implementation. But for this line:
if (hist[i] > top_v) {

I think for those with same height, we need to push them into stack too.
So, it should be:
if (hist[i] >= top_v) {

Try this data:
1,0,0,1,0,1,0,0,0,1

Tom said...

Thanks for the comment. I actually noticed this before when someone else pointed out this too. So I updated that on 5/9/2010 as shown above.

The following are the test cases I used (first column is result, following columns is input array):

25 5 5 5 5 5
15 5 5 5 0 5
5 5
25 5 5 5 10 5
20 3 5 4 7 6 5 2
9 5 4 3 2 1
9 1 2 3 4 5
18 0 1 1 2 2 3 3 4 4 5 5
18 5 5 4 4 3 3 2 2 1 1 0 0
20 5 5 4 4 3 3 2 2 1 1 0 0 0 1 1 2 2 3 3 4 4 5 5 0 3 5 4 7 6 5 2
31 5 5 4 4 3 3 2 2 1 1 1 1 1 1 1 2 2 3 3 4 4 5 5 1 3 5 4 7 6 5 2
8 7 2 1 4 5 1 3 3
4000 4 1000 1000 1000 1000
1 1 0 0 1 0 1 0 0 0 1

Anonymous said...

Perfect!!

Much much better than just changing the condition.

Learned a lot from your post!

Bookmarked!

Tom said...

Thanks. I'm glad it's useful.

Anonymous said...

I found some shorter realization of this idea:

int n;
int P[100100];
int S[100100];
int s_sz=0;

void sol()
{
long long ans=0;
s_sz=0;
P[0]=0;
P[n+1]=0;
for(int a=1; a<=n+1; a++)
{
while(s_sz && P[S[s_sz]]>=P[a])
{
ans = max(ans, (long long)P[S[s_sz]]*(long long)(a-S[s_sz-1]-1));
s_sz--;
}
S[++s_sz] = a;
}
WR("%lld\n", ans);
}

int main()
{
while(1)
{
scanf("%d", &n);
if (!n) break;
for(int a=1; a<=n; a++) scanf("%d", &P[a]);
sol();
}
return 0;
}


i tested this code there: http://www.spoj.pl/problems/HISTOGRA/

Tom said...

This indeed looks much shorter and simpler, using array only instead of stack. It seems to be using dynamic programming. I'll check it carefully when I have time. Thanks for the posting.

Unknown said...

Hi,
the code can be simplify by keeping in the stack a pair of values: the height of the rectangle and the left-most limit of the possibly bigger rectangle it belongs to. Height that equals the current top height will not be inserted. Also, there's no need handle zero height in any special way. Following is my code (the pstack class is just for convenience):

// Return max area of rectangle under histogram;
// hist contain heights, width of each rectangle in histogram
// considered to be 1
uint max_rect(uint *hist, int n) {
// For each "single" or "original" rectangle in the histogram,
// the stack holds its height and the left-most limit of a
// max rectangle that it belongs to; the right-most limit will
// be deduced. Then the area of the max rect containg that
// single rect will be height * (right_limit - left_limit)
pstack s;
uint max=0;

for(int i=0; i max) max = area;
left_limit = s.top_left_limit();
s.pop();
}
if(s.empty() || hist[i] > s.top_val())
s.push(left_limit, hist[i]);
}
// Finished going over the histogram; now calcualte the
// areas for remaining rects
while(! s.empty()) {
uint area = s.top_val() * (n - s.top_left_limit());
if(area > max) max = area;
s.pop();
}
return max;
}



class pstack {
public:
bool empty() { return s.empty(); }
void push(uint first, uint second) {
s.push(new pair(first,second));
}
void pop() { s.pop(); }
uint top_val() { return s.top()->second; }
uint top_left_limit() { return s.top()->first; }
private:
stack*> s;
};

Anonymous said...

my google interview question damn

soumen said...

My solution is given below. But it says wrong answer when submitted to spoj. Can anyone please point out where it goes wrong?



#include

#define MAXELEMENT 100001

typedef struct{
int left;
unsigned long height;
}stack;

int stackEmpty();
stack stackTop();
stack pop();
void push(stack n);
void maxRect(int n);
void input();

unsigned long height[MAXELEMENT];
stack hist[MAXELEMENT];
int top;

int main(){
int n, i;

while(1){
scanf("%d", &n);

if(n == 0)
break;

for(i = 0; i < n; i++)

scanf("%lu", &height[i]);

height[n] = 0; // to pop last elements remaining int stack


top = -1;

maxRect(n);

}

return 0;

}

int stackEmpty(){

return (top == -1) ? 1 : 0;

}

stack stackTop(){

return hist[top];

}

stack pop(){

return hist[top--];

}

void push(stack n){

hist[++top] = n;

}

void maxRect(int n){

int i, start;

unsigned long long temp, max = 0;

for(i = 0; i <= n; i++){

start = i;

while(!stackEmpty() && height[i] < stackTop().height){

temp = (i - stackTop().left) * stackTop().height;

if(temp > max)

max = temp;



start = stackTop().left;

pop();

}

stack ele;

ele.left = start;

ele.height = height[i];

push(ele);

}
printf("%llu\n", max);

}

Blog Archive

Followers