angr’s symbolic execution engine excels at finding security vulnerabilities by exploring program paths and identifying exploitable conditions. This guide covers practical vulnerability discovery workflows.Documentation Index
Fetch the complete documentation index at: https://mintlify.com/angr/angr/llms.txt
Use this file to discover all available pages before exploring further.
Overview
angr can automatically discover:- Buffer overflows (stack and heap)
- Format string vulnerabilities
- Integer overflows
- Use-after-free conditions
- Null pointer dereferences
- Exploitable paths to dangerous functions
Basic Vulnerability Discovery Workflow
Identify dangerous functions
Look for potentially vulnerable function calls:
import angr
project = angr.Project("./vulnerable_binary", auto_load_libs=False)
cfg = project.analyses.CFGFast()
# Find calls to dangerous functions
dangerous_functions = ['strcpy', 'gets', 'sprintf', 'scanf']
for func_name in dangerous_functions:
if func_name in cfg.kb.functions:
print(f"Found {func_name} at:",
hex(cfg.kb.functions[func_name].addr))
Set up symbolic execution to reach the target
Create symbolic input and explore to the dangerous function:
import claripy
# Create symbolic input buffer
input_size = 256
symbolic_input = claripy.BVS('input', 8 * input_size)
state = project.factory.entry_state(stdin=symbolic_input)
simgr = project.factory.simulation_manager(state)
Check for exploitable conditions
Explore paths and check if we can control dangerous operations:
def check_exploitable(state):
"""Check if we control the vulnerable operation"""
# Check if we're at strcpy
if state.addr == strcpy_call_addr:
# Check if we control the source buffer
src_ptr = state.regs.rsi # x86-64 second argument
if state.solver.symbolic(src_ptr):
return True
return False
simgr.explore(find=check_exploitable)
Finding Buffer Overflows
Stack Buffer Overflow Detection
Here’s a complete example based on the strcpy_find challenge:import angr
import claripy
project = angr.Project("./vulnerable_binary", auto_load_libs=False)
cfg = project.analyses.CFGFast()
# Find strcpy call sites
strcpy_plt = project.loader.find_symbol('strcpy').rebased_addr
class BufferOverflowFinder:
def __init__(self, project, cfg):
self.project = project
self.cfg = cfg
self.vulnerable_paths = []
def is_vulnerable_strcpy(self, state):
"""Check if strcpy call is exploitable"""
# Get the call instruction
block = self.project.factory.block(state.addr)
for insn in block.capstone.insns:
if insn.mnemonic == 'call':
# Check destination
target = state.mem[state.regs.rsp].long.resolved
if target == strcpy_plt:
# Check if we control the source
src = state.regs.rsi
# Check if source is symbolic
if state.solver.symbolic(src):
# Check if we can provide arbitrary data
test_value = state.solver.eval(src)
if state.solver.satisfiable():
self.vulnerable_paths.append(state.copy())
return True
return False
def find_overflows(self):
"""Run symbolic execution to find buffer overflows"""
# Create symbolic input
password = claripy.BVS('password', 8 * 64)
state = self.project.factory.entry_state(
stdin=password,
add_options={angr.options.LAZY_SOLVES}
)
simgr = self.project.factory.simulation_manager(state)
# Explore, checking each state
while simgr.active:
for active_state in simgr.active:
if self.is_vulnerable_strcpy(active_state):
print(f"Found vulnerable strcpy at {hex(active_state.addr)}")
simgr.step()
return self.vulnerable_paths
# Run the finder
finder = BufferOverflowFinder(project, cfg)
vulnerable = finder.find_overflows()
for vuln_state in vulnerable:
print("Vulnerable path found!")
exploit = vuln_state.posix.dumps(0)
print("Exploit payload:", exploit)
Buffer overflow detection can be slow on large binaries. Start with targeted analysis of specific functions.
Detecting Controlled Overwrites
Check if you can overwrite specific memory locations:import angr
import claripy
def find_controlled_overwrite(project, target_addr):
"""Find if we can overwrite target_addr with controlled data"""
state = project.factory.entry_state(
stdin=claripy.BVS('input', 8 * 256)
)
# Add constraint to detect overwrites
target_value = state.memory.load(target_addr, 8)
simgr = project.factory.simulation_manager(state)
simgr.explore()
for found_state in simgr.deadended:
current_value = found_state.memory.load(target_addr, 8)
# Check if the value changed and is symbolic
if found_state.solver.symbolic(current_value):
# Try to set it to a specific value (e.g., 0xdeadbeef)
found_state.add_constraints(current_value == 0xdeadbeef)
if found_state.solver.satisfiable():
exploit_input = found_state.posix.dumps(0)
print(f"Can overwrite {hex(target_addr)} with 0xdeadbeef")
print(f"Input: {exploit_input}")
return exploit_input
return None
# Example usage
return_addr_location = 0x7fffffffe008 # Example stack location
exploit = find_controlled_overwrite(project, return_addr_location)
CGC Crash Identification
For DARPA Cyber Grand Challenge binaries:import angr
from angr import sim_options as options
project = angr.Project("./CADET_00001", auto_load_libs=False)
# Enable crash detection options
crash_options = {
options.TRACK_ACTION_HISTORY,
options.TRACK_CONSTRAINT_ACTIONS,
}
state = project.factory.entry_state(
add_options=crash_options,
stdin=angr.claripy.BVS('input', 8 * 200)
)
# Explore for crashes
simgr = project.factory.simulation_manager(state, save_unconstrained=True)
while simgr.active:
simgr.step()
# Check for unconstrained instruction pointers (crashes)
if simgr.unconstrained:
print("Found potential crash!")
for crash_state in simgr.unconstrained:
# Check if instruction pointer is symbolic (we control it)
if crash_state.solver.symbolic(crash_state.regs.pc):
print("Controlled crash - PC is symbolic!")
# Try to set PC to specific value
crash_state.add_constraints(
crash_state.regs.pc == 0x41424344
)
if crash_state.solver.satisfiable():
crashing_input = crash_state.posix.dumps(0)
print("Crashing input:", crashing_input)
print("Length:", len(crashing_input))
break
break
print("Analysis complete")
CGC binaries use a simplified ABI. For regular Linux binaries, you’ll need to account for ASLR, stack canaries, and other protections.
Format String Vulnerability Detection
import angr
import claripy
def find_format_string_vuln(binary_path):
"""Detect format string vulnerabilities"""
project = angr.Project(binary_path, auto_load_libs=False)
cfg = project.analyses.CFGFast()
# Find printf-family functions
printf_funcs = ['printf', 'fprintf', 'sprintf', 'snprintf', 'dprintf']
printf_calls = []
for func_name in printf_funcs:
if func_name in project.loader.main_object.plt:
plt_addr = project.loader.main_object.plt[func_name]
printf_calls.append((func_name, plt_addr))
# For each printf call, check if format string is controlled
for func_name, plt_addr in printf_calls:
print(f"Analyzing {func_name} at {hex(plt_addr)}...")
# Find callers
for node in cfg.graph.nodes():
block = project.factory.block(node.addr)
for insn in block.capstone.insns:
if insn.mnemonic == 'call':
# Check if calling printf
# Set up state right before the call
state = project.factory.blank_state(addr=insn.address)
# Make format string symbolic
fmt_ptr = claripy.BVS('fmt_ptr', 64)
state.regs.rdi = fmt_ptr # First argument
# Check if we can make it point to controlled data
controlled_addr = 0x600000 # Example writable address
state.add_constraints(fmt_ptr == controlled_addr)
if state.solver.satisfiable():
print(f"Potential format string vuln at {hex(insn.address)}")
print(f"Format argument is controllable")
return printf_calls
# Run detection
vulns = find_format_string_vuln("./vulnerable_binary")
Automated Exploitation (Simple AEG)
Based on the Insomnihack AEG challenge:import angr
import claripy
from angr import sim_options as options
class SimpleAEG:
"""Simple Automatic Exploit Generation"""
def __init__(self, binary_path):
self.project = angr.Project(binary_path, auto_load_libs=False)
self.cfg = None
self.vulnerable_functions = []
def analyze(self):
"""Find vulnerabilities"""
print("[*] Building CFG...")
self.cfg = self.project.analyses.CFGFast()
print("[*] Finding dangerous functions...")
dangerous = ['strcpy', 'gets', 'sprintf', 'scanf', 'vscanf']
for func_name in dangerous:
if func_name in self.cfg.kb.functions:
func = self.cfg.kb.functions[func_name]
self.vulnerable_functions.append((func_name, func.addr))
print(f"[+] Found {func_name} at {hex(func.addr)}")
return len(self.vulnerable_functions) > 0
def find_crash(self):
"""Find crashing input"""
print("[*] Searching for crash...")
# Set up symbolic input
flag_chars = [claripy.BVS(f'flag_{i}', 8) for i in range(100)]
flag = claripy.Concat(*flag_chars)
state = self.project.factory.entry_state(
stdin=flag,
add_options={
options.TRACK_ACTION_HISTORY,
}
)
# Constrain to reasonable input
for c in flag_chars:
state.solver.add(c >= 0x20)
state.solver.add(c <= 0x7e)
simgr = self.project.factory.simulation_manager(
state,
save_unconstrained=True
)
# Explore until we find a crash
exploration_steps = 0
max_steps = 50
while simgr.active and exploration_steps < max_steps:
simgr.step()
exploration_steps += 1
if simgr.unconstrained:
print("[+] Found unconstrained state!")
crash_state = simgr.unconstrained[0]
# Check if we control the instruction pointer
if crash_state.solver.symbolic(crash_state.regs.pc):
print("[+] We control PC!")
# Constrain PC to win function or shellcode address
target_addr = 0x41414141
crash_state.add_constraints(
crash_state.regs.pc == target_addr
)
if crash_state.solver.satisfiable():
exploit_input = crash_state.posix.dumps(0)
print("[+] Exploit generated!")
return exploit_input
print("[-] No crash found")
return None
def generate_exploit(self):
"""Full AEG pipeline"""
if not self.analyze():
print("[-] No vulnerable functions found")
return None
crash_input = self.find_crash()
if crash_input:
print("[+] Exploit:")
print(crash_input)
return crash_input
return None
# Usage
aeg = SimpleAEG("./vulnerable_binary")
exploit = aeg.generate_exploit()
if exploit:
# Save exploit to file
with open('exploit_payload.bin', 'wb') as f:
f.write(exploit)
print("[+] Exploit saved to exploit_payload.bin")
For real-world exploitation, you’ll need to handle ASLR, stack canaries, NX, and other modern protections. This example demonstrates the core concept.
Finding Integer Overflows
import angr
import claripy
def find_integer_overflow(project, allocation_func_addr):
"""Find integer overflows in size calculations"""
state = project.factory.entry_state(
stdin=claripy.BVS('input', 8 * 32)
)
simgr = project.factory.simulation_manager(state)
# Explore to allocation function
simgr.explore(find=allocation_func_addr)
for found_state in simgr.found:
# Check size argument (usually first argument)
size_arg = found_state.regs.rdi # x86-64
if found_state.solver.symbolic(size_arg):
# Try to make it overflow
# Check if we can make it very large then wrap to small
found_state.add_constraints(size_arg > 0xFFFFFFFF)
if found_state.solver.satisfiable():
# Found potential integer overflow
print("[+] Potential integer overflow")
exploit = found_state.posix.dumps(0)
return exploit
return None
Grub “Back to 28” Bug Example
Based on the real vulnerability discovered at 32c3:import angr
from angr import exploration_techniques as et
project = angr.Project("./crypto.mod", auto_load_libs=False)
# The vulnerability is in password checking
password_check_addr = 0x1234 # Example address
# Create symbolic password
password = angr.claripy.BVS('password', 8 * 100)
state = project.factory.entry_state(
stdin=password
)
# Use exploration technique to prune state space
simgr = project.factory.simulation_manager(state)
simgr.use_technique(et.DFS())
# Look for states that crash or bypass authentication
simgr.explore(
find=lambda s: check_if_authenticated(s),
avoid=lambda s: check_if_failed(s)
)
def check_if_authenticated(state):
# Check memory or registers for success indicators
return state.addr == success_addr
def check_if_failed(state):
return state.addr == failure_addr
if simgr.found:
winning_state = simgr.found[0]
exploit_password = winning_state.posix.dumps(0)
print("Exploit password:", exploit_password)
Vulnerability Discovery Best Practices
Start with static analysis
Use CFG and function identification to find attack surface:
cfg = project.analyses.CFGFast()
# Find input functions
input_funcs = ['read', 'recv', 'fgets', 'scanf', 'getline']
entry_points = []
for func_name in input_funcs:
if func_name in cfg.kb.functions:
entry_points.append(cfg.kb.functions[func_name].addr)
Use exploration techniques
Prevent state explosion:
from angr import exploration_techniques as et
simgr.use_technique(et.DFS())
simgr.use_technique(et.LoopSeer(bound=5))