缓存淘汰算法 (LRU、LFU)

介绍

LRU（Least Recently Used）和LFU（Least Frequently Used）是两种常见的缓存淘汰算法，用于在缓存空间有限的情况下选择合适的缓存对象进行淘汰，以提高缓存的利用效率。

LRU算法基于”最近最少使用”的原则进行淘汰。它维护一个缓存的访问顺序链表，当有新的数据被访问时，如果数据已经在缓存中，则将其移到链表头部；如果数据不在缓存中，则将其添加到链表头部。当需要淘汰数据时，选择链表尾部的数据进行淘汰，因为尾部的数据是最近最少被访问的数据。

LFU算法基于”最不经常使用”的原则进行淘汰。它维护一个缓存对象的访问频次，对于每个访问到的对象，增加其访问频次。当需要淘汰数据时，选择访问频次最低的数据进行淘汰。

LRU和LFU算法都有各自的优势和适用场景：

LRU算法适用于访问具有时间局部性的数据，即最近被访问的数据可能在将来一段时间内仍然会被访问。LRU算法相对较简单，容易实现，适用于大多数场景。但是，当存在”热点数据”（被频繁访问的数据）时，LRU算法可能无法有效地保证缓存的命中率。
LFU算法适用于访问具有访问频次局部性的数据，即访问频次高的数据很可能在将来一段时间内仍然会被频繁访问。LFU算法需要维护每个对象的访问频次计数，相对于LRU算法来说更加复杂。LFU算法在面对热点数据的场景下表现较好，但在某些场景下可能存在”频次突变”的问题，即频次高的数据突然不再被访问，但因为频次计数较高而长时间无法被淘汰。

在实际应用中，我们可以根据具体的场景和需求选择合适的缓存淘汰策略，或者结合两种算法进行混合使用，以达到更好的缓存性能。

146. LRU 缓存中等

下面用javascript代码演示一下LRU缓存：

// 最近最少使用
// 定义LRU缓存类
class LRUCache {
    constructor(capacity) {
        this.capacity = capacity;
        this.cache = new Map();
    }

    get (key) {
        if (this.cache.has(key)) {
            const value = this.cache.get(key);
            this.cache.delete(key); // 删除key在Map中的位置
            this.cache.set(key, value); // 重新设置key对在Map的最后（最近使用,也就是设置为最新的）
            return value;
        }
        return -1;
    }

    put (key, value) {
        if (this.cache.has(key)) {
            this.cache.delete(key); // 删除key在Map中的位置
        } else if (this.cache.size >= this.capacity) {
            // 缓存容量不够了
            const oldestKey = this.cache.keys().next().value;
            this.cache.delete(oldestKey); // 删除最旧的key
        }
        this.cache.set(key, value);
    }
}

// 示例用法
const cache = new LRUCache(2); // 创建容量为2的LRU缓存

cache.put(1, 1);
cache.put(2, 2);
console.log(cache.get(1)); // 输出 1

cache.put(3, 3);
console.log(cache.get(2)); // 输出 -1
console.log(cache.get(3)); // 输出 3

cache.put(4, 4);
console.log(cache.get(1)); // 输出 -1
console.log(cache.get(3)); // 输出 3
console.log(cache.get(4)); // 输出 4

这段代码的技巧就在于使用了ES6的Map和迭代器的功能。Map 的遍历顺序就是插入顺序，使用Map.prototype.keys()：返回键名的遍历器。然后我们使用迭代器的 next() 方法获取数据结构的第一个成员,也就是最旧的。然后通过 value 属性来获取它的值。拿到最旧的key就可以删除它，最后重新设置key。（Map具有LRU特性）

460. LFU 缓存困难

javascript代码实现LFU：

// 最不经常使用
// 最不经常使用
class LFUCache {
    constructor(capacity) {
        this.capacity = capacity;
        this.cache = new Map();
        // 每个访问次数都对应存储一个 Set，里面包含 cache 里面的 Map 的 key
        this.frequency = new Map();
        this.minFrequency = 0;
    }

    get(key) {
        if (this.cache.has(key)) {
            const { value, freq } = this.cache.get(key);
            this.updateFrequency(key, freq); // 更新访问次数
            return value;
        }
        return -1;
    }

    put(key, value) {
        if (this.capacity === 0) return;

        if (this.cache.has(key)) {
            const { freq } = this.cache.get(key); // 获取当前元素旧的访问次数
            this.cache.set(key, { value, freq }); // 更新当前 key 对应的元素
            this.updateFrequency(key, freq); // 更新当前的访问次数
        } else {
            if (this.cache.size >= this.capacity) {
                // 获取访问次数最少的 key 组成的 Set
                const leastFreq = this.frequency.get(this.minFrequency);
                // 根据 Set 的 LRU 特性，返回最久未被访问的元素（如果访问次数最少的元素有多个，最久未被访问的元素会被删除）
                const deleteKey = leastFreq.keys().next().value;
                leastFreq.delete(deleteKey);
                this.cache.delete(deleteKey);
            }
            // 新添加元素时设置元素的访问次数为 1
            this.cache.set(key,{value,freq: 1});
            if (!this.frequency.has(1)) {
                // 新添加元素时，如果不存在访问次数为1对应的 Set,设置访问次数为 1 的 Set 为空
                this.frequency.set(1,new Set())
            }
            // 设置访问次数为1的 Set 中添加当前的key
            this.frequency.get(1).add(key);
            this.minFrequency = 1; // 新添加元素时最小访问次数更新为 1
        }
    }

    updateFrequency(key, freq) {
        // 从当前访问次数 freq 对应的 Set中，删除当前的 key
        const freqList = this.frequency.get(freq);
        freqList.delete(key);

        // 当前元素旧的访问次数等于最小访问次数并且当前访问次数 freq 组成的 Set 为空
        //（元素旧的访问次数等于最小访问次数，并且最小访问次数对应的 Set 为空的情况）
        // 如果某个元素它之前的访问次数为最小访问次数，而且没有其他元素与该元素的访问次数相同
        if (freq === this.minFrequency && freqList.size === 0) {
            this.minFrequency++;  // 更新最小访问次数
        }

        if (!this.frequency.has(freq + 1)) {
            this.frequency.set(freq + 1, new Set());
        }
        // 在更新后的访问次数对应的 Set 里面添加当前元素对应的 key
        this.frequency.get(freq + 1).add(key);
        this.cache.get(key).freq = freq + 1; // 更新当前元素的访问次数 + 1
    }
}

// 示例用法
const cache = new LFUCache(2); // 创建容量为2的LFU缓存

cache.put(1, 1);
cache.put(2, 2);
console.log(cache.get(1)); // 输出 1

cache.put(3, 3); // 删除 1
console.log(cache.get(2)); // 输出 2

console.log(cache.get(3)); // 输出 3

cache.put(4, 4); // 删除 2
console.log(cache.get(2)); // 输出 -1
console.log(cache.get(3)); // 输出 3
console.log(cache.get(4)); // 输出 4

同样这里使用了 Set 数据结构的 LRU 特性，在 frequency 对应的 Map 里面放在着每种访问次数对应的 key 组成的 Set。也就是说，访问次数相同的元素所对应的 key 存放在同一个 Set 中。这里要注意的就是新增元素时，设置访问次数为1的 Set为空,并且添加访问该元素的 key，同时设置最小访问次数为1。如果某个元素它之前的访问次数为最小访问次数，而且没有其他元素与该元素的访问次数相同，同时更新最小访问次数和该元素的访问次数，也就是该元素的访问次数 + 1，并且设置对应的 Set 数据结构为空，把当前访问的 key 添加进去。

go代码演示LRU缓存：

package main

import "fmt"

// LRUCache 最近最少使用
type LRUCache struct {
	size       int
	capacity   int
	cache      map[int]*DoubleLinkNode
	head, tail *DoubleLinkNode
}

// DoubleLinkNode 双向链表
type DoubleLinkNode struct {
	key, value int
	prev, next *DoubleLinkNode
}

// 初始化双向链表
func initDoubleLinkNode(key, value int) *DoubleLinkNode {
	return &DoubleLinkNode{
		key:   key,
		value: value,
	}
}

// Constructor 初始化LRU缓存
func Constructor(capacity int) LRUCache {
	l := LRUCache{
		capacity: capacity,
		cache:    map[int]*DoubleLinkNode{},
		head:     initDoubleLinkNode(0, 0),
		tail:     initDoubleLinkNode(0, 0),
	}
	l.head.next = l.tail
	l.tail.prev = l.head
	return l
}

func (l *LRUCache) Get(key int) int {
	if _, ok := l.cache[key]; !ok {
		return -1
	}
	node := l.cache[key]
	l.moveToHead(node)
	return node.value
}

func (l *LRUCache) Put(key, value int) {
	// 新增元素 放在链表头部
	if _, ok := l.cache[key]; !ok {
		node := initDoubleLinkNode(key, value)
		l.cache[key] = node
		l.addToHead(node)
		l.size++
		// 超出容量 删除最后一个元素
		if l.size > l.capacity {
			removed := l.removeTail()
			delete(l.cache, removed.key)
			l.size--
		}
	} else {
		// 更新元素 放在链表头部
		node := l.cache[key]
		node.value = value
		l.moveToHead(node)
	}
}

func (l *LRUCache) addToHead(node *DoubleLinkNode) {
	node.prev = l.head
	node.next = l.head.next
	l.head.next.prev = node
	l.head.next = node
}

func (l *LRUCache) removeNode(node *DoubleLinkNode) {
	node.prev.next = node.next
	node.next.prev = node.prev
}

func (l *LRUCache) moveToHead(node *DoubleLinkNode) {
	l.removeNode(node)
	l.addToHead(node)
}

func (l *LRUCache) removeTail() *DoubleLinkNode {
	node := l.tail.prev
	l.removeNode(node)
	return node
}

func main() {
	cache := Constructor(2)

	cache.Put(1, 1)
	cache.Put(2, 2)

	fmt.Println(cache.Get(1)) //删除2 输出 1

	cache.Put(3, 3)

	fmt.Println(cache.Get(2)) // 输出 -1

	cache.Put(4, 4) // 删除 1

	fmt.Println(cache.Get(1)) // 输出 -1
	fmt.Println(cache.Get(3)) // 输出 3
	fmt.Println(cache.Get(4)) // 输出 4
}

代码使用了双向链表 + map实现了LRU数据结构。代码中采用了首尾虚拟节点，访问到的节点移动到头部，超出容量时删除尾部的节点。

go代码实现LFU数据结构：

package main

import "fmt"

// LFUCache 最不经常使用
type LFUCache struct {
	size     int // 大小
	capacity int // 容量
	minFreq  int // 最小访问次数
	cache    map[int]*Node
	freq     map[int]*DLinkedList
}

type Node struct {
	key        int
	value      int
	freq       int // 访问次数
	prev, next *Node
}

// DLinkedList 访问次数为n的元素组成的双向链表（LRU特性）
type DLinkedList struct {
	head, tail *Node
}

func Constructor(capacity int) LFUCache {
	return LFUCache{
		capacity: capacity,
		cache:    make(map[int]*Node),
		freq:     make(map[int]*DLinkedList),
	}
}

func InitDLinkedList() *DLinkedList {
	head, tail := &Node{}, &Node{}
	head.next = tail
	tail.prev = head
	return &DLinkedList{
		head: head,
		tail: tail,
	}
}

func (d *DLinkedList) AddToHead(node *Node) {
	node.prev = d.head
	node.next = d.head.next

	d.head.next.prev = node
	d.head.next = node
}

func (d *DLinkedList) Remove(node *Node) {
	node.prev.next = node.next
	node.next.prev = node.prev

	node.prev = nil
	node.next = nil
}

func (d *DLinkedList) RemoveTail() *Node {
	// 空链表
	if d.IsEmpty() {
		return nil
	}
	last := d.tail.prev
	d.Remove(last)
	return last
}

func (d *DLinkedList) IsEmpty() bool {
	return d.head.next == d.tail
}

func (l *LFUCache) Get(key int) int {
	if node, ok := l.cache[key]; ok {
		l.addFreq(node)
		return node.value
	}
	return -1
}

func (l *LFUCache) Put(key, value int) {
	if l.capacity == 0 {
		return
	}
	// 更新
	if node, ok := l.cache[key]; ok {
		node.value = value
		l.addFreq(node)
	} else {
		// 超出容量，删除访问次数最低的对应数据链表的最旧的元素
		if l.size >= l.capacity {
			node := l.freq[l.minFreq].RemoveTail()
			delete(l.cache, node.key)
			l.size--
		}
		node := &Node{key: key, value: value, freq: 1}
		l.cache[key] = node // 储存新节点

		if l.freq[1] == nil {
			l.freq[1] = InitDLinkedList()
		}
		l.freq[1].AddToHead(node)

		l.minFreq = 1
		l.size++
	}
}

func (l *LFUCache) addFreq(node *Node) {
	n := node.freq
	l.freq[n].Remove(node)

	if l.minFreq == n && l.freq[n].IsEmpty() {
		l.minFreq++
		delete(l.freq, n)
	}

	node.freq++
	if l.freq[node.freq] == nil {
		l.freq[node.freq] = InitDLinkedList()
	}
	l.freq[node.freq].AddToHead(node)
}

func main() {
	cache := Constructor(2)
	cache.Put(1, 1)
	cache.Put(2, 2)

	fmt.Println(cache.Get(1)) // 1
	cache.Put(3, 3)           // 删除2

	fmt.Println(cache.Get(2)) // -1
	fmt.Println(cache.Get(3)) // 3

	cache.Put(4, 4) // key2被淘汰

	fmt.Println(cache.Get(1)) // -1
	fmt.Println(cache.Get(3)) // 3
	fmt.Println(cache.Get(4)) // 4
}

cache存储对应的数据节点，但是可能会出现节点访问次数相同的情况。对应访问次数相同的节点，我们把放在一个双向链表中，然后根据LRU最近最少使用，超过容量的时候删除双向链表尾部的节点。拿到节点的key,之后就能从map中删除储存的节点了。