unstrip: Restoring Function Information to Stripped...

22
Paradyn Project Paradyn / Dyninst Week Madison, Wisconsin May 2-4, 2011 unstrip: Restoring Function Information to Stripped Binaries Using Dyninst Emily Jacobson and Nathan Rosenblum

Transcript of unstrip: Restoring Function Information to Stripped...

Page 1: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Paradyn Project

Paradyn / Dyninst Week

Madison, Wisconsin

May 2-4, 2011

unstrip: Restoring Function Information

to Stripped Binaries Using Dyninst

Emily Jacobson and Nathan Rosenblum

Page 2: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Binary Tools Need Symbol Tables

o Debugging Tools

oGDB, IDA Pro…

o Instrumentation Tools

o PIN, Dyninst,…

o Static Analysis Tools

oCodeSurfer/x86,…

o Security Analysis Tools

o IDA Pro,…

2 unstrip: Restoring Function Information to Stripped Binaries

Page 3: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

<targ8056f50>:

push %ebp

mov %esp,%ebp

sub %0x8,%esp

mov 0x8(%ebp),%eax

add $0xfffffff8,%esp

push %eax

call <targ80c3bd0>

push %eax

call <targ8057220>

mov %ebp,%esp

pop %ebp

3 unstrip: Restoring Function Information to Stripped Binaries

push %ebp

mov %esp,%ebp

sub %0x8,%esp

mov 0x8(%ebp),%eax

add $0xfffffff8,%esp

push %eax

call 80c3bd0

push %eax

call 8057220

mov %ebp,%esp

pop %ebp

unstrip

unstrip = stripped parsing

+

binary rewriting

Page 4: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

New Semantic Information

o Important semantic information:

program’s interaction with the operating system

(system calls)

o These calls are encapsulated in wrapper functions

Library fingerprinting: identify functions based on

patterns learned from exemplar libraries

4 unstrip: Restoring Function Information to Stripped Binaries

Page 5: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

<targ8056f50>:

push %ebp

mov %esp,%ebp

sub %0x8,%esp

mov 0x8(%ebp),%eax

add $0xfffffff8,%esp

push %eax

call <targ80c3bd0>

push %eax

call <targ8057220>

mov %ebp,%esp

pop %ebp

5 unstrip: Restoring Function Information to Stripped Binaries

push %ebp

mov %esp,%ebp

sub %0x8,%esp

mov 0x8(%ebp),%eax

add $0xfffffff8,%esp

push %eax

call 80c3bd0

push %eax

call 8057220

mov %ebp,%esp

pop %ebp

unstrip = stripped parsing

+

+

binary rewriting

library fingerprinting

call <getpid>

call <kill>

unstrip

Page 6: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

6

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048300

ret

mov %esi,%esi

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

Set up system

call arguments

int $0x80 Invoke a system

call mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048300

ret

Error check and

return

Page 7: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

7

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048300

ret

mov %esi,%esi

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx int $0x80

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048300

ret

<accept>:

cmpl $0x0,%gs:0xc

jne 80f669c

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

call *0x814e93c

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048460

ret

push %esi

call

libc_enable_asyncancel

mov %eax,%esi

mov %ebx,%edx

mov $0x66,%eax

mov $0x5,%ebx

lea 0x8(%esp),%ecx

call *0x8181578

mov %edx, %ebx

xchg %eax,%esi

call

libc_disable_acynancel

mov %esi,%eax

pop %esi

cmp $0xffffff83,%eax

jae syscall_error

ret

glibc 2.5 on RHEL with GCC 3.4.4

glibc 2.2.4 on RHEL

The same function

can be realized in

a variety of ways

in the binary

<accept>:

cmpl $0x0,%gs:0xc

jne 80f669c

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048460

ret

push %esi

call

libc_enable_asyncancel

mov %eax,%esi

mov %ebx,%edx

mov $0x66,%eax

mov $0x5,%ebx

lea 0x8(%esp),%ecx

int $0x80

mov %edx, %ebx

xchg %eax,%esi

call

libc_disable_acynancel

mov %esi,%eax

pop %esi

cmp $0xffffff83,%eax

jae syscall_error

ret

glibc 2.5 on RHEL with GCC 4.1.2

Page 8: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Semantic Descriptors

o Instead, we’ll take a semantic approach

o Record information that is likely to be invariant

across multiple versions of the function

8 unstrip: Restoring Function Information to Stripped Binaries

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

mov %edx, %ebx

cmp %0xffffff83,%eax

jae 8048300

ret

mov %esi,%esi

int $0x80

mov %0x66,%eax

mov $0x5,%ebx

{<socketcall >} , 5

Page 9: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Building Semantic Descriptors

9 unstrip: Restoring Function Information to Stripped Binaries

We parse an input binary, locate

system calls and wrapper function

calls, and employ dataflow analysis.

binary

reboot:

push %ebp

mov %esp,%ebp

sub $0x10,%esp

push %edi

push %ebx

mov 0x8(%ebp),%edx

mov $0xfee1dead,%edi

mov $0x28121969,%ecx

push %ebx

mov %edi,%ebx

mov $0x58,%eax

int $0x80

SYSTEM CALL

0x58 0x28121969

EAX EBX ECX

%edi

0xfee1dead

{<reboot, 0xfee1dead, 0x2812969>}

EAX

Page 10: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

unstrip

Building a Descriptor Database

10 unstrip: Restoring Function Information to Stripped Binaries

Descriptor

Database

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

Locate wrapper functions

Build semantic

descriptors

{<socketcall, 5>}: accept

{<socketcall, 4>}: listen

{<getpid>}: getpid

glibc

reference

library

Page 11: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

glibc

reference

library glibc

reference

library glibc

reference

library glibc

reference

library

unstrip

Building a Descriptor Database

11 unstrip: Restoring Function Information to Stripped Binaries

Descriptor

Database Build semantic

descriptors

Locate wrapper functions

{<socketcall, 5>}: accept

{<socketcall, 4>}: listen

{<getpid>}: getpid

{<socketcall, 5>}: accept

{<socketcall, 4>}: listen

{<getpid>}: getpid

{<socketcall, 5>}: accept

{<socketcall, 4>}: listen

{<getpid>}: getpid

{<socketcall, 5>}: accept

{<socketcall, 4>}: listen

{<getpid>}: getpid

1

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

1

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

1

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

1

<accept>:

mov %ebx, %edx

mov %0x66,%eax

mov $0x5,%ebx

lea 0x4(%esp),%ecx

int $0x80

Page 12: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

unstrip

Identifying Functions in a Stripped Binary

12 unstrip: Restoring Function Information to Stripped Binaries

stripped

binary

unstripped

binary

Descriptor

Database

For each wrapper function

{

1. Build the semantic

descriptor.

2. Search the database

for a match (two stages).

3. Add label to symbol

table.

}

Page 13: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation

o To evaluate across three dimensions of variation,

we constructed three data sets:

o compiler version

o library version

o distribution vendor

o In each set, compile statically-linked binaries, build

a DBB, compare unstrip to IDA Pro’s FLIRT

o Evaluation measure is accuracy

13 unstrip: Restoring Function Information to Stripped Binaries

Page 14: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Compiler Version Study

0

0.25

0.5

0.75

1

3.4.4 4.0.2 4.1.2 4.2.1

accu

racy

GCC 3.4.4 Patterns Predicting Each Library

unstrip

IDA Pro

14 unstrip: Restoring Function Information to Stripped Binaries

Page 15: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Library Version Study

0

0.25

0.5

0.75

1

2.2.4 2.3.2 2.3.4 2.5 2.11.1

accu

racy

glibc 2.2.4 Patterns Predicting Each Library

unstrip

IDA Pro

15 unstrip: Restoring Function Information to Stripped Binaries

Page 16: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Distribution Study

0

0.25

0.5

0.75

1

Fedora Mandrivia OpenSuse Ubuntu

accu

racy

Fedora Patterns Predicting Each Library

unstrip

IDA Pro

16 unstrip: Restoring Function Information to Stripped Binaries

Page 17: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

17 unstrip: Restoring Function Information to Stripped Binaries

For full details, tech report available online at: ftp://ftp.cs.wisc.edu/paradyn/papers/Jacobson11Unstrip.pdf

unstrip is available at: http://www.paradyn.org/html/tools/unstrip.html

Come see the unstrip demo today at

2:00 or 2:30 (in 1260 WID/MIR)

Page 18: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

18 unstrip: Restoring Function Information to Stripped Binaries

Extra Slides

o Some additional results

Page 19: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Distribution Study

0

0.25

0.5

0.75

1

Fedora Mandrivia OpenSuse Ubuntu

accu

racy

Mandrivia Patterns Predicting Each Library

unstrip

IDA Pro

19 unstrip: Restoring Function Information to Stripped Binaries

Page 20: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Toolchain Study

(one predicts the rest)

0

0.25

0.5

0.75

1

3.4.4 4.0.2 4.1.2 4.2.1

Accu

racy

GNU C Compiler Version

unstrip

IDA Pro

20 unstrip: Restoring Function Information to Stripped Binaries

Page 21: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Library Version Study

(one predicts the rest)

0

0.25

0.5

0.75

1

2.2.4 2.3.2 2.3.4 2.5 2.11.1

Accu

racy

glibc version

unstrip

IDA Pro

21 unstrip: Restoring Function Information to Stripped Binaries

Page 22: unstrip: Restoring Function Information to Stripped ...pages.cs.wisc.edu/~jacobson/pubs/jacobson-unstrip-slides.pdf · Building Semantic Descriptors unstrip: Restoring Function Information

Evaluation Results: Distribution Study

(one predicts the rest)

0

0.25

0.5

0.75

1

Fedora Mandrivia OpenSuse Ubuntu

Accu

racy

Distribution Vendor

unstrip

IDA Pro

22 unstrip: Restoring Function Information to Stripped Binaries