CACHE & TLB (3）

CACHE & TLB (三）
source code

以ARM926EJ－S为例分析，

主要的代码位于arch/arm/mm/cache-arm926.s和arch/arm/mm/proc-arm926.s

1. cache和tlb的定义

#ifdef MULTI_CACHE

ENTRY(\name\()_cache_fns)

.long \name\()_flush_icache_all

.long \name\()_flush_kern_cache_all

.long \name\()_flush_user_cache_all

.long \name\()_flush_user_cache_range

.long \name\()_coherent_kern_range

.long \name\()_coherent_user_range

.long \name\()_flush_kern_dcache_area

.long \name\()_dma_map_area

.long \name\()_dma_unmap_area

.long \name\()_dma_inv_range

.long \name\()_dma_clean_range

.long \name\()_dma_flush_range

.size \name\()_cache_fns, . - \name\()_cache_fns

.endm

* thecache line size of the I and D cache

#define CACHE_DLINESIZE 32

* MM Cache Management

* ===================

* The arch/arm/mm/cache-*.S andarch/arm/mm/proc-*.S files

* implement these methods.

* Start addresses are inclusive and endaddresses are exclusive;

* start addresses should be rounded down,end addresses up.

* See Documentation/cachetlb.txt for moreinformation.

* Please note that the implementation ofthese, and the required

* effects are cache-type (VIVT/VIPT/PIPT)specific.

* flush_icache_all()

* Unconditionally clean andinvalidate the entire icache.

* Currently only needed forcache-v6.S and cache-v7.S, see

* __flush_icache_all for thegeneric implementation.

* flush_kern_all()

* Unconditionally clean andinvalidate the entire cache.

* flush_user_all()

* Clean and invalidate all userspace cache entries

* before a change of pagetables.

* flush_user_range(start, end, flags)

* Clean and invalidate arange of cache entries in the

* specified address spacebefore a change of page tables.

* - start - user startaddress (inclusive, page aligned)

* - end - user end address (exclusive, page aligned)

* - flags - vma->vm_flagsfield

* coherent_kern_range(start, end)

* Ensure coherency betweenthe Icache and the Dcache in the

* region described by start, end. If you have non-snooping

* Harvard caches, you need toimplement this function.

* - start - virtual start address

* - end - virtual end address

* coherent_user_range(start, end)

* Ensure coherency betweenthe Icache and the Dcache in the

* region described by start,end. If you have non-snooping

* Harvard caches, you need toimplement this function.

* - start - virtual start address

* - end - virtual end address

* flush_kern_dcache_area(kaddr, size)

* Ensure that the data heldin page is written back.

* - kaddr - page address

* - size - region size

* DMA Cache Coherency

* ===================

* dma_inv_range(start, end)

* Invalidate (discard) thespecified virtual address range.

* May not write back anyentries. If 'start' or 'end'

* are not cache line aligned,those lines must be written

* back.

* - start - virtual start address

* - end - virtual end address

* dma_clean_range(start, end)

* Clean (write back) the specifiedvirtual address range.

* - start - virtual start address

* - end - virtual end address

* dma_flush_range(start, end)

* Clean and invalidate thespecified virtual address range.

* - start - virtual start address

* - end - virtual end address

struct cpu_cache_fns {

void(*flush_icache_all)(void);

void(*flush_kern_all)(void);

void(*flush_user_all)(void);

void(*flush_user_range)(unsigned long, unsigned long, unsigned int);

void(*coherent_kern_range)(unsigned long, unsigned long);

void(*coherent_user_range)(unsigned long, unsigned long);

void(*flush_kern_dcache_area)(void *, size_t);

void(*dma_map_area)(const void *, size_t, int);

void(*dma_unmap_area)(const void *, size_t, int);

void(*dma_inv_range)(const void *, const void *);

void(*dma_clean_range)(const void *, const void *);

void(*dma_flush_range)(const void *, const void *);

};

extern struct cpu_cache_fns cpu_cache;

#define __cpuc_flush_icache_all cpu_cache.flush_icache_all

#define __cpuc_flush_kern_all cpu_cache.flush_kern_all

#define __cpuc_flush_user_all cpu_cache.flush_user_all

#define __cpuc_flush_user_range cpu_cache.flush_user_range

#define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range

#define __cpuc_coherent_user_range cpu_cache.coherent_user_range

#define __cpuc_flush_dcache_area cpu_cache.flush_kern_dcache_area

*These are private to the dma-mapping API. Do not use directly.

*Their sole purpose is to ensure that data held in the cache

* is visibleto DMA, or data written by DMA to system memory is

*visible to the CPU.

#define dmac_map_area cpu_cache.dma_map_area

#define dmac_unmap_area cpu_cache.dma_unmap_area

#define dmac_inv_range cpu_cache.dma_inv_range

#define dmac_clean_range cpu_cache.dma_clean_range

#define dmac_flush_range cpu_cache.dma_flush_range

#ifdef MULTI_TLB

struct cpu_tlb_fns cpu_tlb __read_mostly;

#endif

struct cpu_tlb_fns {

void(*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);

void(*flush_kern_range)(unsigned long, unsigned long);

unsignedlong tlb_flags;

};

* TLB Management

* ==============

* The arch/arm/mm/tlb-*.S files implementthese methods.

* The TLB specific code is expected toperform whatever tests it

* needs to determine if it shouldinvalidate the TLB for each

* call. Start addresses are inclusive and end addresses are

* exclusive; it is safe to round theseaddresses down.

* flush_tlb_all()

* Invalidate the entire TLB.

* flush_tlb_mm(mm)

* Invalidate all TLB entriesin a particular address

* space.

* - mm - mm_struct describing address space

* flush_tlb_range(mm,start,end)

* Invalidate a range of TLBentries in the specified

* address space.

* - mm - mm_struct describing address space

* - start - start address(may not be aligned)

* - end - end address (exclusive, may not bealigned)

* flush_tlb_page(vaddr,vma)

* Invalidate the specifiedpage in the specified address range.

* - vaddr - virtual address(may not be aligned)

* - vma - vma_struct describing address range

* flush_kern_tlb_page(kaddr)

* Invalidate the TLB entryfor the specified page. The address

* will be in the kernelsvirtual memory space. Current uses

* only require the D-TLB tobe invalidated.

* - kaddr - Kernel virtualmemory address

#ifdef MULTI_TLB

#define __cpu_flush_user_tlb_range cpu_tlb.flush_user_range

#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range

#define __cpu_tlb_flags cpu_tlb.tlb_flags

*Convert calls to our calling convention.

#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)

#define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)

2.体系结构指针的传递

在setup_processor函数中，体系相关的指针传递，通过 lookup_processor_type函数.

static void __init setup_processor(void)

{

structproc_info_list *list;

* locate processor in the list of supportedprocessor

* types. The linker builds this table for us from the

* entries in arch/arm/mm/proc-*.S

list= lookup_processor_type(read_cpuid_id());

#ifdef MULTI_CPU

processor= *list->proc;

#endif

#ifdef MULTI_TLB

cpu_tlb= *list->tlb;

#endif

#ifdef MULTI_USER

cpu_user= *list->user;

#endif

#ifdef MULTI_CACHE

cpu_cache= *list->cache;

#endif

...

}

例如， ARM926EJS

.type __arm926_proc_info,#object

__arm926_proc_info:

.long 0x41069260 @ARM926EJ-S (v5TEJ)

.long 0xff0ffff0

.long PMD_TYPE_SECT | \

PMD_SECT_BUFFERABLE| \

PMD_SECT_CACHEABLE| \

PMD_BIT4| \

PMD_SECT_AP_WRITE| \

PMD_SECT_AP_READ

.long PMD_TYPE_SECT | \

PMD_BIT4| \

PMD_SECT_AP_WRITE| \

PMD_SECT_AP_READ

b __arm926_setup

.long cpu_arch_name

.long cpu_elf_name

.long cpu_arm926_name

.long arm926_processor_functions

.long v4wbi_tlb_fns

.long v4wb_user_fns

.long arm926_cache_fns

.size __arm926_proc_info, . - __arm926_proc_info

3.cpu_tlb & cpu_cache

focuson cpu_tlb & cpu_cache.

a)cpu_cache

For example,

unmap_single--- __cpuc_flush_dcache_area,期间不需要刷新TLB.

__cpuc_flush_dcache_area(ptr,size);----- arm926_flush_kern_dcache_area

ps:

//设置Dcache的无效位,使得主存储有效，则缓冲行无效,会从主存中获取数据

其中lr为返回寄存器，所以最后要返回时，mov pc,lr

r0为参数ptr,r1为参数size

Thesource code:

ENTRY(arm926_flush_kern_dcache_area)

add r1, r0, r1

1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry

add r0, r0, #CACHE_DLINESIZE

cmp r0, r1

blo 1b

mov r0, #0

mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache

mcr p15, 0, r0, c7, c10, 4 @ drain WB

mov pc,lr @return unmap_single !

Ps:

if set invalidate, it mean the phymemory will flush data to cache

if set validate,it mean the cachewill flush data to phy memory when write-back

for example,

* flush_kern_cache_all()

* Cleanand invalidate the entire cache.

ENTRY(arm926_flush_kern_cache_all)

mov r2,#VM_EXEC

mov ip,#0

__flush_whole_cache:

#ifdefCONFIG_CPU_DCACHE_WRITETHROUGH

mcr p15,0, ip, c7, c6, 0 @invalidate D cache

#else

1: mrc p15,0, r15, c7, c14, 3 @test,clean,invalidate

bne 1b

#endif

tst r2,#VM_EXEC

mcrne p15, 0, ip, c7, c5, 0 @invalidate I cache

mcrne p15, 0, ip, c7, c10, 4 @drain WB

mov pc,lr

b)cpu_tlb

这里使用的tlb是 v4wbi的，具体的可以见ARM926EJ-s的说明

for example,

#definelocal_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)

static inline voidipi_flush_tlb_kernel_range(void *arg)

{

struct tlb_args *ta = (structtlb_args *)arg;

local_flush_tlb_kernel_range(ta->ta_start,ta->ta_end);

}

lr=ipi_flush_tlb_kernel_range

r0=ta->ta_start

r1=ta->ta_end

local_flush_tlb_kernel_page is justa special case of local_flush_tlb_kernel_range.

The end=start+pagesize

个人观点：

From the source code ,we can seethat the flush_kernel_tlb_range is the same as flush_dcache_area (just set theinvalidate I TLB flag & D TLB flag), what is more , when we flush kerneltlb range ,we should write back the data to avoid the TLB can't hit successfully.

The function is important whencontext_swith & SMP arch.

The source code:

ENTRY(v4wbi_flush_kern_tlb_range)

mov r3,#0

mcr p15,0, r3, c7, c10, 4 @ drain WB

bic r0,r0, #0x0ff

bic r0,r0, #0xf00

1: mcr p15,0, r0, c8, c5, 1 @invalidate I TLB entry

mcr p15,0, r0, c8, c6, 1 @invalidate D TLB entry

add r0,r0, #PAGE_SZ

cmp r0,r1

blo 1b

mov pc,lr

4. 什么时候需要刷新tlb呢？

个人观点：

1.First, flush_tlb whencontext_switch

2.SMP arch flush_tlb_kernel_range(if need boardcast)

3.dma_remap/__dma_free_remap

4.unmap_area_sections it hasrelative with !CONFIG_SMP

#if!defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)

* Section support is unsafe on SMP - If youiounmap and ioremap a region,

* the other CPUs will not see this changeuntil their next context switch.

* Meanwhile, (eg) if an interrupt comes in onone of those other CPUs

* which requires the new ioremap'd region tobe referenced, the CPU will

* reference the _old_ region.

* Note that get_vm_area_caller() allocates aguard 4K page, so we need to

* mask the size back to 1MB aligned or we willoverflow in the loop below.

static voidunmap_area_sections(unsigned long virt, unsigned long size)

5. unmap_kernel_range

/**

* unmap_kernel_range - unmap kernel VM areaand flush cache and TLB

* @addr: start of the VM area to unmap

* @size: size of the VM area to unmap

* Similar to unmap_kernel_range_noflush() butflushes vcache before

* the unmapping and tlb after.

voidunmap_kernel_range(unsigned long addr, unsigned long size)

CACHE amp; TLB (3）

热点推荐