📜  使用 STL 的运行整数流的中位数

📅  最后修改于: 2021-10-28 01:53:53             🧑  作者: Mango

鉴于正在从数据流中读取整数。从第一个整数到最后一个整数,找出到目前为止读取的所有元素的中位数。这也称为运行整数的中位数。数据流可以是任何数据源,例如文件、整数数组、输入流等。

什么是中位数?

中位数可以定义为数据集中将数据样本的上半部分与下半部分分开的元素。换句话说,我们可以得到中间元素,因为当输入大小为奇数时,我们取排序数据的中间元素。如果输入大小是偶数,我们选择排序流中中间两个元素的平均值。
例子:

做法:思路是使用最大堆和最小堆来存储上半部分和下半部分的元素。最大堆和最小堆可以使用 C++ STL 中的 priority_queue 来实现。下面是解决这个问题的分步算法。
算法:

  1. 创建两个堆。在任何时间点,一个最大堆用于维护下半部分的元素,一个最小堆用于维护上半部分的元素。
  2. 取中位数的初始值为 0。
  3. 对于每个新读取的元素,将其插入到最大堆或最小堆中,并根据以下条件计算中值:
    • 如果最大堆的大小大于最小堆的大小并且元素小于之前的中位数,则从最大堆中弹出顶部元素并插入最小堆并将新元素插入最大堆,否则插入新元素元素到最小堆。计算新中值作为最大和最小堆元素顶部的平均值。
    • 如果最大堆的大小小于最小堆的大小并且元素大于前一个中值,则从最小堆中弹出顶部元素并插入最大堆并将新元素插入最小堆,否则插入最大堆的新元素。计算新中值作为最大和最小堆元素顶部的平均值。
    • 如果两个堆的大小相同。然后检查当前是否小于以前的中位数。如果当前元素小于前一个中值,则将其插入最大堆,新中值将等于最大堆的顶部元素。如果当前元素大于前一个中值,则将其插入最小堆,新中值将等于最小堆的顶部元素。

下面是上述方法的实现。

C++
// C++ program to find med in
// stream of running integers
#include
using namespace std;
 
// function to calculate med of stream
void printMedians(double arr[], int n)
{
    // max heap to store the smaller half elements
    priority_queue s;
 
    // min heap to store the greater half elements
    priority_queue,greater > g;
 
    double med = arr[0];
    s.push(arr[0]);
 
    cout << med << endl;
 
    // reading elements of stream one by one
    /*  At any time we try to make heaps balanced and
        their sizes differ by at-most 1. If heaps are
        balanced,then we declare median as average of
        min_heap_right.top() and max_heap_left.top()
        If heaps are unbalanced,then median is defined
        as the top element of heap of larger size  */
    for (int i=1; i < n; i++)
    {
        double x = arr[i];
 
        // case1(left side heap has more elements)
        if (s.size() > g.size())
        {
            if (x < med)
            {
                g.push(s.top());
                s.pop();
                s.push(x);
            }
            else
                g.push(x);
 
            med = (s.top() + g.top())/2.0;
        }
 
        // case2(both heaps are balanced)
        else if (s.size()==g.size())
        {
            if (x < med)
            {
                s.push(x);
                med = (double)s.top();
            }
            else
            {
                g.push(x);
                med = (double)g.top();
            }
        }
 
        // case3(right side heap has more elements)
        else
        {
            if (x > med)
            {
                s.push(g.top());
                g.pop();
                g.push(x);
            }
            else
                s.push(x);
 
            med = (s.top() + g.top())/2.0;
        }
 
        cout << med << endl;
    }
}
 
// Driver program to test above functions
int main()
{
    // stream of integers
    double arr[] = {5, 15, 10, 20, 3};
    int n = sizeof(arr)/sizeof(arr[0]);
    printMedians(arr, n);
    return 0;
}


Java
// Java program to find med in
// stream of running integers
import java.util.Collections;
import java.util.PriorityQueue;
 
public class MedianMaintain
{
     
    // method to calculate med of stream
    public static void printMedian(int[] a)
    {
         
        double med = a[0];
         
        // max heap to store the smaller half elements
        PriorityQueue smaller = new PriorityQueue<>
        (Collections.reverseOrder());
         
        // min-heap to store the greater half elements
        PriorityQueue greater = new PriorityQueue<>();
         
        smaller.add(a[0]);
        System.out.println(med);
         
        // reading elements of stream one by one
        /* At any time we try to make heaps balanced and
            their sizes differ by at-most 1. If heaps are
            balanced,then we declare median as average of
            min_heap_right.top() and max_heap_left.top()
            If heaps are unbalanced,then median is defined
            as the top element of heap of larger size */
        for(int i = 1; i < a.length; i++)
        {
             
            int x = a[i];
             
            // case1(left side heap has more elements)
            if(smaller.size() > greater.size())
            {
                if(x < med)
                {
                    greater.add(smaller.remove());
                    smaller.add(x);
                }
                else
                    greater.add(x);
                med = (double)(smaller.peek() + greater.peek())/2;
            }
             
            // case2(both heaps are balanced)
            else if(smaller.size() == greater.size())
            {
                if(x < med)
                {
                    smaller.add(x);
                    med = (double)smaller.peek();
                }
                else
                {
                    greater.add(x);
                    med = (double)greater.peek();
                }
            }
             
            // case3(right side heap has more elements)
            else
            {
                if(x > med)
                {
                    smaller.add(greater.remove());
                    greater.add(x);
                }
                else
                    smaller.add(x);
                med = (double)(smaller.peek() + greater.peek())/2;
                 
            }
            System.out.println(med);
        }
    }
     
    // Driver code
    public static void main(String []args)
    {
         
        // stream of integers
        int[] arr = new int[]{5, 15, 10, 20, 3};
        printMedian(arr);
    }
}
 
// This code is contributed by Kaustav kumar Chanda.


C#
// C# program to find med in
// stream of running integers
using System;
using System.Collections.Generic;
public class MedianMaintain
{
 
  // method to calculate med of stream
  public static void printMedian(int[] a)
  {   
    double med = a[0];
 
    // max heap to store the smaller half elements
    List smaller = new List();
 
    // min-heap to store the greater half elements
    List  greater = new  List();    
    smaller.Add(a[0]);
    Console.WriteLine(med);
 
    // reading elements of stream one by one
    /* At any time we try to make heaps balanced and
            their sizes differ by at-most 1. If heaps are
            balanced,then we declare median as average of
            min_heap_right.top() and max_heap_left.top()
            If heaps are unbalanced,then median is defined
            as the top element of heap of larger size */
    for(int i = 1; i < a.Length; i++)
    {
 
      int x = a[i];
 
      // case1(left side heap has more elements)
      if(smaller.Count > greater.Count)
      {
        if(x < med)
        {
          smaller.Sort();
          smaller.Reverse();
          greater.Add(smaller[0]);
          smaller.RemoveAt(0);
          smaller.Add(x);
        }
        else
          greater.Add(x);
        smaller.Sort();
        smaller.Reverse();
        greater.Sort();
        med = (double)(smaller[0] + greater[0])/2;
      }
 
      // case2(both heaps are balanced)
      else if(smaller.Count == greater.Count)
      {
        if(x < med)
        {
          smaller.Add(x);
          smaller.Sort();
          smaller.Reverse();
          med = (double)smaller[0];
        }
        else
        {
          greater.Add(x);
          greater.Sort();
          med = (double)greater[0];
        }
      }
 
      // case3(right side heap has more elements)
      else
      {
        if(x > med)
        {
          greater.Sort();
          smaller.Add(greater[0]);
          greater.RemoveAt(0);
          greater.Add(x);
        }
        else
          smaller.Add(x);
        smaller.Sort();
        smaller.Reverse();
        med = (double)(smaller[0] + greater[0])/2;
 
      }
      Console.WriteLine(med);
    }
  }
 
  // Driver code
  public static void Main(String []args)
  {
 
    // stream of integers
    int[] arr = new int[]{5, 15, 10, 20, 3};
    printMedian(arr);
  }
}
 
// This code is contributed by Rajput-Ji


Javascript


输出:
5
10
10
12.5
10

复杂度分析:

  • 时间复杂度: O(n Log n)。
    在最小堆中插入元素的时间复杂度为 log n。所以插入n个元素是O(n log n)。
  • 辅助空间: O(n)。
    在堆中存储元素所需的空间是 O(n)。

如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程学生竞争性编程现场课程