`
linest
  • 浏览: 150784 次
  • 性别: Icon_minigender_1
  • 来自: 内蒙古
社区版块
存档分类
最新评论

读代码-Pattern和FrequentPatternMaxHeap

 
阅读更多
package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
public class Pattern implements Comparable<Pattern>

pattern封装了一组item,每个item的support值,整体的support值

  private int[] pattern;
  
  private long support = Long.MAX_VALUE;
  
  private long[] supportValues;



判断pattern是否包含。由于pattern已经是升序排列,两个数组比较推进即可。
包含有几个个条件:1.本数组长度要小外来 2.如果外来数组已经大于本数组当前值则不可能3.要么两个数组都达到末尾,要么外来数组没有到末尾
  public final boolean isSubPatternOf(Pattern frequentPattern) {
    int[] otherPattern = frequentPattern.getPattern();
    int otherLength = frequentPattern.length();
    if (this.length() > frequentPattern.length()) {
      return false;
    }
    int i = 0;
    int otherI = 0;
    while (i < length && otherI < otherLength) {
      if (otherPattern[otherI] == pattern[i]) {
        otherI++;
        i++;
      } else if (otherPattern[otherI] < pattern[i]) {
        otherI++;
      } else {
        return false;
      }
    }
    return otherI != otherLength || i == length;
  }


扩容。增大1.5倍再复制之前的。
  private void resize() {
    int size = (int) (GROWTH_RATE * length);
    if (size < DEFAULT_INITIAL_SIZE) {
      size = DEFAULT_INITIAL_SIZE;
    }
    int[] oldpattern = pattern;
    long[] oldSupport = supportValues;
    this.pattern = new int[size];
    this.supportValues = new long[size];
    System.arraycopy(oldpattern, 0, this.pattern, 0, length);
    System.arraycopy(oldSupport, 0, this.supportValues, 0, length);
  }



增加一个item,总support取所有item最小值
  public final void add(int id, long supportCount) {
    dirty = true;
    if (length >= pattern.length) {
      resize();
    }
    this.pattern[length] = id;
    this.supportValues[length++] = supportCount;
    this.support = supportCount > this.support ? this.support : supportCount;
  }




package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
public final class FrequentPatternMaxHeap

在堆中保存top k个pattern

核心结构
  private final PriorityQueue<Pattern> queue;


同一support值的pattern组成一个set
  private final OpenLongObjectHashMap<Set<Pattern>> patternIndex;




判断是否可添加
如果容量未满直接可添加,如果已满,若support值大于当前最小也可以添加
  public boolean addable(long support) {
    return count < maxSize || least.support() <= support;
  }



插入新pattern,如果堆满了,存在替换过程,如果有子pattern检查还要维护同support的集合。insert函数主要进行插入前的条件筛选,主要逻辑。
  public void insert(Pattern frequentPattern) {
    if (frequentPattern.length() == 0) {
      return;
    }
    
    if (count == maxSize) {
      if (frequentPattern.compareTo(least) > 0 && addPattern(frequentPattern)) {
        Pattern evictedItem = queue.poll();
        least = queue.peek();
        if (subPatternCheck) {
          patternIndex.get(evictedItem.support()).remove(evictedItem);
        }
      }
    } else {
      if (addPattern(frequentPattern)) {
        count++;
        if (least == null) {
          least = frequentPattern;
        } else {
          if (least.compareTo(frequentPattern) < 0) {
            least = frequentPattern;
          }
        }
      }
    }
  }




具体的插入过程。检查子集,如果堆中已经有超集则不插入。
  private boolean addPattern(Pattern frequentPattern) {
    if (subPatternCheck) {
      Long index = frequentPattern.support();
      if (patternIndex.containsKey(index)) {
        Set<Pattern> indexSet = patternIndex.get(index);
        boolean replace = false;
        Pattern replacablePattern = null;
        for (Pattern p : indexSet) {
          if (frequentPattern.isSubPatternOf(p)) {
            return false;
          } else if (p.isSubPatternOf(frequentPattern)) {
            replace = true;
            replacablePattern = p;
            break;
          }
        }
        if (replace) {
          indexSet.remove(replacablePattern);
          if (!indexSet.contains(frequentPattern) && queue.add(frequentPattern)) {
            indexSet.add(frequentPattern);
          }
          return false;
        }
        queue.add(frequentPattern);
        indexSet.add(frequentPattern);
      } else {
        queue.add(frequentPattern);
        Set<Pattern> patternList;
        if (!patternIndex.containsKey(index)) {
          patternList = new HashSet<Pattern>();
          patternIndex.put(index, patternList);
        }
        patternList = patternIndex.get(index);
        patternList.add(frequentPattern);
      }
    } else {
      queue.add(frequentPattern);
    }
    return true;
  }
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics