March 1, 2014

How to translate virtual to physical addresses through /proc/pid/pagemap

I currently work on a project where I need to make translations for virtual addresses of user-level application to physical addresses in Linux. I implemented my own system call to do that, but had hard times with verifying the results I'm getting.

Later I found out that in newer kernels there is a really nice virtual file in the /proc file system to get this information. I tried to cat it, doing cat /proc/self/pagemap, and got terrible binary output in my console.

So, it looks like working with this file is not such a pleasant experience. It's a binary file with all that it implies. I found couple of scripts that access this file and provide you with a nice text result, but unfortunately those were written in perl and ruby, and I needed to run it on very minimalistic embedded system. I needed something that fits into a single binary.

Long story short, I decided to bite the bullet and write a tool in C. My contribution might be helpful for someone, that's why I'm sharing this code.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <stdint.h>

#define PAGEMAP_ENTRY 8
#define GET_BIT(X,Y) (X & ((uint64_t)1<<Y)) >> Y
#define GET_PFN(X) X & 0x7FFFFFFFFFFFFF

const int __endian_bit = 1;
#define is_bigendian() ( (*(char*)&__endian_bit) == 0 )

int i, c, pid, status;
unsigned long virt_addr; 
uint64_t read_val, file_offset;
char path_buf [0x100] = {};
FILE * f;
char *end;

int read_pagemap(char * path_buf, unsigned long virt_addr);

int main(int argc, char ** argv){
   //printf("%lu\n", GET_BIT(0xA680000000000000, 63));
   //return 0;
   if(argc!=3){
      printf("Argument number is not correct!\n pagemap PID VIRTUAL_ADDRESS\n");
      return -1;
   }
   if(!memcmp(argv[1],"self",sizeof("self"))){
      sprintf(path_buf, "/proc/self/pagemap");
      pid = -1;
   }
   else{
         pid = strtol(argv[1],&end, 10);
         if (end == argv[1] || *end != '\0' || pid<=0){ 
            printf("PID must be a positive number or 'self'\n");
            return -1;
            }
       }
   virt_addr = strtol(argv[2], NULL, 16);
   if(pid!=-1)
      sprintf(path_buf, "/proc/%u/pagemap", pid);
   
   read_pagemap(path_buf, virt_addr);
   return 0;
}

int read_pagemap(char * path_buf, unsigned long virt_addr){
   printf("Big endian? %d\n", is_bigendian());
   f = fopen(path_buf, "rb");
   if(!f){
      printf("Error! Cannot open %s\n", path_buf);
      return -1;
   }
   
   //Shifting by virt-addr-offset number of bytes
   //and multiplying by the size of an address (the size of an entry in pagemap file)
   file_offset = virt_addr / getpagesize() * PAGEMAP_ENTRY;
   printf("Vaddr: 0x%lx, Page_size: %d, Entry_size: %d\n", virt_addr, getpagesize(), PAGEMAP_ENTRY);
   printf("Reading %s at 0x%llx\n", path_buf, (unsigned long long) file_offset);
   status = fseek(f, file_offset, SEEK_SET);
   if(status){
      perror("Failed to do fseek!");
      return -1;
   }
   errno = 0;
   read_val = 0;
   unsigned char c_buf[PAGEMAP_ENTRY];
   for(i=0; i < PAGEMAP_ENTRY; i++){
      c = getc(f);
      if(c==EOF){
         printf("\nReached end of the file\n");
         return 0;
      }
      if(is_bigendian())
           c_buf[i] = c;
      else
           c_buf[PAGEMAP_ENTRY - i - 1] = c;
      printf("[%d]0x%x ", i, c);
   }
   for(i=0; i < PAGEMAP_ENTRY; i++){
      //printf("%d ",c_buf[i]);
      read_val = (read_val << 8) + c_buf[i];
   }
   printf("\n");
   printf("Result: 0x%llx\n", (unsigned long long) read_val);
   //if(GET_BIT(read_val, 63))
   if(GET_BIT(read_val, 63))
      printf("PFN: 0x%llx\n",(unsigned long long) GET_PFN(read_val));
   else
      printf("Page not present\n");
   if(GET_BIT(read_val, 62))
      printf("Page swapped\n");
   fclose(f);
   return 0;
}
And now how you use it. It's very simple. Of course you need to compile it. Then you need to find out what mapping your target process does have. You can do that by reading /proc/pid/maps file. Fortunately that file is human readable.
When you know a valid virtual address, you can pass it to our tool to get actual value from pagemap, including physical frame number. Here is an example:

$ #let's find get virtual address of a page
$ cat /proc/self/maps 
00400000-0040b000 r-xp 00000000 08:02 1177367                            /bin/cat
0060a000-0060b000 r--p 0000a000 08:02 1177367                            /bin/cat
0060b000-0060c000 rw-p 0000b000 08:02 1177367                            /bin/cat
0223a000-0225b000 rw-p 00000000 00:00 0                                  [heap]
7fe7e15e1000-7fe7e1cc3000 r--p 00000000 08:02 1577390                    /usr/lib/locale/locale-archive
7fe7e1cc3000-7fe7e1e80000 r-xp 00000000 08:02 527324                     /lib/x86_64-linux-gnu/libc-2.17.so
7fe7e1e80000-7fe7e2080000 ---p 001bd000 08:02 527324                     /lib/x86_64-linux-gnu/libc-2.17.so
7fe7e2080000-7fe7e2084000 r--p 001bd000 08:02 527324                     /lib/x86_64-linux-gnu/libc-2.17.so
7fe7e2084000-7fe7e2086000 rw-p 001c1000 08:02 527324                     /lib/x86_64-linux-gnu/libc-2.17.so
7fe7e2086000-7fe7e208b000 rw-p 00000000 00:00 0 
7fe7e208b000-7fe7e20ae000 r-xp 00000000 08:02 527300                     /lib/x86_64-linux-gnu/ld-2.17.so
7fe7e228d000-7fe7e2290000 rw-p 00000000 00:00 0 
7fe7e22ab000-7fe7e22ad000 rw-p 00000000 00:00 0 
7fe7e22ad000-7fe7e22ae000 r--p 00022000 08:02 527300                     /lib/x86_64-linux-gnu/ld-2.17.so
7fe7e22ae000-7fe7e22b0000 rw-p 00023000 08:02 527300                     /lib/x86_64-linux-gnu/ld-2.17.so
7fffce6b6000-7fffce6d7000 rw-p 00000000 00:00 0                          [stack]
7fffce722000-7fffce724000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
$ #don't forget alsr, normally only /bin/cat will remain same
$ #so let's pick 0x00400000. Now we run our program. 
$ #First argument is pid, "self" is a legal option too, the second is virtual address
$ ./pagemap self 0x00400000
Reading /proc/self/pagemap at 0x2000
Result: 0a60000000008c445

We got  0x0a60000000008c445 as a result. There are some bits showing that the page is valid, along with the size of the page. You can reed more in Linux documentation: https://www.kernel.org/doc/Documentation/vm/pagemap.txt. Basically, the physical page number is 0x8c445.

Note, that in different kernel versions bits 56-60 have different meaning. In most current versions, they are forced to zero, however in kernel version 3.11.0 they represent page size.

UPDATE!
The original version of the code worked only on x86-64 machines. It used to read sizeof(unsigned int) amount of bytes from the binary file, but it must read 64 bits no matter what word size target machine has. There was another issue, on OpenRISC simulator the fread() function always failed, that's why I changed the code to use cget() instead, that way it works with all architectures I tested so far.

I plan to expand the functionality of my program, for example include support of range lookups in the /proc/*/pagemap files. If someone is interested in some functionality like that, leave a comment below.

23 comments:

  1. Good job! I'm in need of something like this. I was not really sure about accessing filemap file and this has helped me a lot.
    Just my two cents:
    If you look at the kernel source files, the source file $(kernel_src_dir)/tools/vm/page-types.c specifies some usefull macros for accessing that file:
    PM_ENTRY_BYTES: the size of each pagemap entry
    PM_STATUS(nr): returns the three status flags (is file-page, page swapped and page present)
    PM_PSHIFT(x): returns the page shift
    PM_PFRAME(x): returns the page frame number (PFN)
    PM_PRESENT: mask for the present flag
    PM_SWAP: mask for the swapped flag

    Unfortunately these macros are not in the user-level headers so one must copy them into his own code in order to make use of them.

    Cheers

    ReplyDelete
  2. Thank you D-Man. Excellent Job. I am able to use it!

    ReplyDelete
  3. Hey D-man. First of all excellent work! I t worked for my regular 4kB pages. However when I try to find my huge page data's physical address it does not find it. Any ideas??

    Thanks!

    ReplyDelete
    Replies
    1. Thank you. Sorry but I didn't try to play with huge pages yet. Did you find something useful yourself?

      Delete
  4. This comment has been removed by the author.

    ReplyDelete
  5. Thanks you very much .
    But now I got some problems.
    First I think ,
    printf("Result: 0x%llx\n", (unsigned long long) read_val); should be printf("Result : 0x%llx\n", (unsigned long long) (read_val+virt_addr%getpagesize()));
    Second ,do you know how to get some info of virt_to_pfn,pfn_to_page,page_to_phy in user space?
    Third , how can I get virtual address from logical address?
    Any tip will be appreciated.

    ReplyDelete
  6. am Emerson here (yemerson1976@gmail.com). Thanks for sharing this code. It works well for code segment page but not for stack.
    Let me explain.

    Below is my process map which is always running (waiting in a while(1) loop).

    # cat /proc/3346/maps
    00010000-00011000 r-xp 00000000 00:15 6959007 /tmp/test_code
    00020000-00021000 rwxp 00000000 00:15 6959007 /tmp/test_code
    b6e1e000-b6ef6000 r-xp 00000000 fe:01 184 /lib/libc-2.18-2013.10.so
    b6ef6000-b6f06000 ---p 000d8000 fe:01 184 /lib/libc-2.18-2013.10.so
    b6f06000-b6f08000 r-xp 000d8000 fe:01 184 /lib/libc-2.18-2013.10.so
    b6f08000-b6f09000 rwxp 000da000 fe:01 184 /lib/libc-2.18-2013.10.so
    b6f09000-b6f0c000 rwxp 00000000 00:00 0
    b6f0c000-b6f23000 r-xp 00000000 fe:01 167 /lib/ld-2.18-2013.10.so
    b6f2f000-b6f31000 rwxp 00000000 00:00 0
    b6f31000-b6f32000 r-xp 00000000 00:00 0 [sigpage]
    b6f32000-b6f33000 r-xp 00016000 fe:01 167 /lib/ld-2.18-2013.10.so
    b6f33000-b6f34000 rwxp 00017000 fe:01 167 /lib/ld-2.18-2013.10.so
    be962000-be983000 rw-p 00000000 00:00 0 [stack]
    ffff0000-ffff1000 r-xp 00000000 00:00 0 [vectors]

    # ./get_pfn 3346 0x10000 // code page is working.
    Big endian? 0
    Vaddr: 0x10000, Page_size: 4096, Entry_size: 8
    Reading /proc/3346/pagemap at 0x80
    [0]0xfb [1]0x6a [2]0x3 [3]0x0 [4]0x0 [5]0x0 [6]0x0 [7]0xa6
    Result: 0xa600000000036afb
    PFN: 0x36afb


    # ./get_pfn 3346 0xbe962000 // stack page is not working
    Big endian? 0
    Vaddr: 0x7fffffff, Page_size: 4096, Entry_size: 8
    Reading /proc/3346/pagemap at 0x3ffff8
    [0]0x0 [1]0x0 [2]0x0 [3]0x0 [4]0x0 [5]0x0 [6]0x0 [7]0x6
    Result: 0x600000000000000
    Page not present

    Kindly let me know if I miss anything.

    Thanks much,
    Emerson

    ReplyDelete
    Replies
    1. That's interesting that stack pages do not work. Looks like the kernel for some reason gives no information about stack pages.

      Delete
    2. It is probably just because the stack page is not resident in RAM at the moment. Try comparing it to the pmap output to see if RSS shows that region

      Delete
  7. Hiii,Thanks for the code.
    But for me its showing following error: I would be grateful if can any one help me out:
    ./pageamap self 0x00400000
    Big endian? 0
    Error! Cannot open /proc/self/pagemap


    My kernel is version is 3.13.0-73-generic. Is it a problem or something else..??

    ReplyDelete
    Replies
    1. Try to run it with sudo. Physical frame numbers is potentially dangerous information, the kernel does not want let user processes to mess with it.

      Delete
  8. Thanks for share the code.But it not work well on Android(arm linux).
    root@sp9832iea_4m_volte:/data/local/tmp # cat /proc/3507/maps
    b6400000-b6c00000 rw-p 00000000 00:00 0 [anon:libc_malloc]
    b6e54000-b6e5c000 rw-s 60b00000 00:0c 6804 /dev/sprd_jpg
    b6e5c000-b6e5d000 r-xp 00000000 b3:11 1285 /system/lib/libnetd_client.so
    b6e5d000-b6e5e000 ---p 00000000 00:00 0
    b6e5e000-b6e5f000 r--p 00001000 b3:11 1285 /system/lib/libnetd_client.so
    b6e5f000-b6e60000 rw-p 00002000 b3:11 1285 /system/lib/libnetd_client.so
    b6e60000-b6e61000 r--p 00000000 00:00 0
    b6e61000-b6e81000 r--s 00000000 00:0c 7924 /dev/__properties__
    b6e81000-b6ee7000 r-xp 00000000 b3:11 1176 /system/lib/libc.so
    b6ee7000-b6ee8000 ---p 00000000 00:00 0
    b6ee8000-b6eeb000 r--p 00066000 b3:11 1176 /system/lib/libc.so
    b6eeb000-b6eee000 rw-p 00069000 b3:11 1176 /system/lib/libc.so
    b6eee000-b6ef7000 rw-p 00000000 00:00 0
    b6ef7000-b6f0d000 r-xp 00000000 b3:11 1265 /system/lib/libm.so
    b6f0d000-b6f0e000 ---p 00000000 00:00 0
    b6f0e000-b6f0f000 r--p 00016000 b3:11 1265 /system/lib/libm.so
    b6f0f000-b6f10000 rw-p 00017000 b3:11 1265 /system/lib/libm.so
    b6f10000-b6f12000 r-xp 00000000 b3:11 1388 /system/lib/libstdc++.so
    b6f12000-b6f13000 r--p 00001000 b3:11 1388 /system/lib/libstdc++.so
    b6f13000-b6f14000 rw-p 00002000 b3:11 1388 /system/lib/libstdc++.so
    b6f14000-b6f18000 r-xp 00000000 b3:11 1261 /system/lib/liblog.so
    b6f18000-b6f19000 r--p 00003000 b3:11 1261 /system/lib/liblog.so
    b6f19000-b6f1a000 rw-p 00004000 b3:11 1261 /system/lib/liblog.so
    b6f1a000-b6f1b000 r--p 00000000 00:00 0 [anon:linker_alloc]
    b6f1b000-b6f1c000 rw-p 00000000 00:00 0 [anon:linker_alloc]
    b6f1c000-b6f1d000 r--p 00000000 00:00 0 [anon:linker_alloc]
    b6f1d000-b6f1e000 r--p 00000000 00:00 0
    b6f1e000-b6f20000 rw-p 00000000 00:00 0
    b6f20000-b6f21000 r-xp 00000000 00:00 0 [sigpage]
    b6f21000-b6f2f000 r-xp 00000000 b3:11 610 /system/bin/linker
    b6f2f000-b6f30000 r--p 0000d000 b3:11 610 /system/bin/linker
    b6f30000-b6f31000 rw-p 0000e000 b3:11 610 /system/bin/linker
    b6f31000-b6f32000 rw-p 00000000 00:00 0
    b6f32000-b6f34000 r-xp 00000000 b3:17 222055 /data/local/tmp/get_phy_addr
    b6f34000-b6f35000 r--p 00001000 b3:17 222055 /data/local/tmp/get_phy_addr
    b6f35000-b6f36000 rw-p 00000000 00:00 0
    be8fe000-be91f000 rw-p 00000000 00:00 0 [stack]
    ffff0000-ffff1000 r-xp 00000000 00:00 0 [vectors]




    root@sp9832iea_4m_volte:/data/local/tmp # ./tool 3507 b6e54000
    pid:3529
    Big endian? 0
    Vaddr: 0x7fffffff, Page_size: 4096, Entry_size: 8
    Reading /proc/3507/pagemap at 0x3ffff8
    [0]0x0 [1]0x0 [2]0x0 [3]0x0 [4]0x0 [5]0x0 [6]0x0 [7]0x6
    Result: 0x600000000000000
    Page not present
    root@sp9832iea_4m_volte:/data/local/tmp # ./tool 3507 b6e5c000
    pid:3531
    Big endian? 0
    Vaddr: 0x7fffffff, Page_size: 4096, Entry_size: 8
    Reading /proc/3507/pagemap at 0x3ffff8
    [0]0x0 [1]0x0 [2]0x0 [3]0x0 [4]0x0 [5]0x0 [6]0x0 [7]0x6
    Result: 0x600000000000000
    Page not present


    my email : Cheng_Cheng_C_C@outlook.com .Looking forward to your reply.

    ReplyDelete
    Replies
    1. This comment has been removed by the author.

      Delete
    2. I'm also running this on an arm processor and get the same results.

      Delete
  9. Excellent, code is working for allocated memory.
    Does anyone know how to get physical address of mmaped memory?

    I have linux driver which is allocating specific kind of memory (for DMA, uncached).
    It is simply file where mmap fn. is implemented like this:
    int uma_mmap(struct file *filp, struct vm_area_struct *vma) {
    phys_addr_t physAddr;
    size_t size = vma->vm_end - vma->vm_start;
    void * virtAddr;

    // we are not using NULL instead of device, because device has hot specified
    // memory range
    virtAddr = dma_alloc_coherent(NULL, size, &physAddr, GFP_KERNEL);

    if (!virtAddr) {
    pr_info(
    KERN_ERR"[uncached_mem_allocator] %s: can not allocate memory of size %zu, device:%p\n",
    __func__, size, mmap_dev_device.this_device);
    return -ENOMEM;
    }
    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

    if (remap_pfn_range(vma, vma->vm_start, physAddr >> PAGE_SHIFT, size,
    vma->vm_page_prot) < 0) {
    printk(KERN_ERR"[uncached_mem_allocator] %s: Error remap failed\n",
    __func__);
    vma->vm_ops =NULL;
    dma_free_coherent(NULL, size, virtAddr, physAddr);
    return -EAGAIN;
    }

    return 0;
    }

    If I try your code I will get "Page not present".

    ReplyDelete
  10. Your blog are very interesting I am a regular reader of your blog. This is nice post and gives lots of information to us. Virtual Number

    ReplyDelete
  11. This comment has been removed by the author.

    ReplyDelete
  12. Thanks!
    I used your code to write it in python and study for my OS test (=

    goes something like:

    import resource
    f = open("proc/TheProcessId/pagemap", 'rb')
    virtual_addr = int(TheVirtualAddressInHexAsString, 16)
    file_offset = virtual_addr / resource.getpagesize() * 8
    f.seek(file_offset)
    for i in range (0,8):
    byte = f.read(1)
    print repr(byte)

    not sure if this gives the real answer needed but I did get some value that looked
    sensible + helped me understand the virtual address calculations (=

    ReplyDelete
  13. It worked for me on aarch64, except that you were missing to include the string.h header for memcpy. Thanks!

    rack# ./a.out 719 0x2bdfc000
    Big endian? 0
    Vaddr: 0x2bdfc000, Page_size: 16384, Entry_size: 8
    Reading /proc/719/pagemap at 0x57bf8
    [0]0x73 [1]0x25 [2]0x20 [3]0x0 [4]0x0 [5]0x0 [6]0x0 [7]0x81
    Result: 0x7400000000202873
    PFN: 0x202873

    ReplyDelete
  14. I don't get it... how to get the physical memory address from the page address??

    ReplyDelete
  15. My aim is to find the current process task struct

    ReplyDelete
  16. Great job for publishing such a beneficial web site. Your web log isn’t only useful but it is additionally really creative too. There tend to be not many people who can certainly write not so simple posts that artistically. Continue the nice writing
    Translation services near me

    ReplyDelete