Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/angr/angr/llms.txt

Use this file to discover all available pages before exploring further.

angr provides two complementary data-flow analyses: DDG (Data Dependence Graph) for fast dependency tracking and VFG (Value-Flow Graph) for precise abstract interpretation.

Overview

Data-flow analysis tracks how data moves through a program:
  • DDG: Fast, CFG-based data dependence tracking
  • VFG: Precise value-flow analysis with abstract interpretation

DDG: Data Dependence Graph

The DDG analysis builds a graph showing data dependencies between program statements.

Basic Usage

import angr

# Load binary and generate CFG
p = angr.Project('/bin/example', load_options={'auto_load_libs': False})

# DDG requires CFGEmulated with specific options
cfg = p.analyses.CFGEmulated(
    keep_state=True,
    state_add_options={angr.options.TRACK_MEMORY_ACTIONS}
)

# Generate DDG
ddg = p.analyses.DDG(cfg)

# Access the graphs
print(f"Statement graph: {ddg.graph}")
print(f"Data graph: {ddg.data_graph}")
print(f"Simplified: {ddg.simplified_data_graph}")
Important: DDG requires a CFGEmulated with:
  • keep_state=True to preserve states
  • state_add_options={angr.options.TRACK_MEMORY_ACTIONS} to track memory/register accesses
DDG is fast but not sound - use VFG for more accurate analysis.

DDG Configuration

ddg = p.analyses.DDG(
    cfg,
    
    # Start address for analysis
    start=0x400000,
    
    # Limit call depth (None = unlimited)
    call_depth=5,
    
    # Only analyze specific blocks
    block_addrs={0x400000, 0x400100},
)

Understanding DDG

DDG tracks three types of dependencies:
1

Temporary Variables

Dependencies within a single basic block:
# Example: t0 = reg_eax; t1 = t0 + 1
# t1 depends on t0
2

Register Dependencies

Cross-block register dependencies:
# Block A: eax = 5
# Block B: ebx = eax
# ebx in B depends on eax in A
3

Memory Dependencies

Stack and memory dependencies:
# Block A: store [esp+4], eax
# Block B: ebx = load [esp+4]
# Load in B depends on store in A

Accessing DDG Data

Graph of dependencies between statements:
import networkx as nx

# Iterate over dependencies
for src, dst, data in ddg.graph.edges(data=True):
    print(f"{src} -> {dst}")
    print(f"  Type: {data.get('type')}")
    print(f"  Data: {data.get('data')}")

# Find predecessors (what this depends on)
location = CodeLocation(0x400000, 5)
if location in ddg:
    preds = ddg.get_predecessors(location)
    print(f"Dependencies: {preds}")

DDG Graph Types

# Statement dependence graph
# Nodes: CodeLocation objects
# Edges: type (reg/mem/tmp), data
for src, dst in ddg.graph.edges():
    print(f"{src.block_addr:#x}:{src.stmt_idx} -> "
          f"{dst.block_addr:#x}:{dst.stmt_idx}")

# Data dependence graph  
# Nodes: ProgramVariable objects
# Edges: type (mem_addr/mem_data/kill)
for node in ddg.data_graph.nodes():
    print(f"Var: {node.variable} @ {node.location}")

# Simplified data graph
# Removes transitive edges and temporaries
simple = ddg.simplified_data_graph
print(f"Simplified: {len(simple.nodes())} nodes")

Example: Backward Slicing with DDG

from angr.code_location import CodeLocation
from angr.sim_variable import SimRegisterVariable

def get_dependencies(ddg, addr, stmt_idx, reg_name):
    """Find all dependencies for a register at a location."""
    
    # Get register offset
    reg_offset = ddg.project.arch.registers[reg_name][0]
    reg_size = ddg.project.arch.registers[reg_name][1]
    
    # Create the variable and location
    variable = SimRegisterVariable(reg_offset, reg_size)
    location = CodeLocation(addr, stmt_idx)
    
    from angr.analyses.ddg import ProgramVariable
    pv = ProgramVariable(variable, location, arch=ddg.project.arch)
    
    # Get subgraph of dependencies
    subgraph = ddg.data_sub_graph(pv, simplified=True)
    
    print(f"Dependencies for {reg_name} at {addr:#x}:{stmt_idx}:")
    for node in subgraph.nodes():
        print(f"  {node}")
    
    return subgraph

# Usage
ddg = p.analyses.DDG(cfg)
subgraph = get_dependencies(ddg, 0x400100, 10, 'eax')

VFG: Value-Flow Graph

The VFG analysis performs abstract interpretation to track value flow with higher precision.

Basic Usage

# VFG can generate its own CFG or use existing
vfg = p.analyses.VFG(
    # Optional: provide CFGEmulated
    cfg=None,
    
    # Function to analyze
    function_start=0x400000,
    
    # Context sensitivity (0-infinity)
    context_sensitivity_level=2,
)

# Access the graph
print(f"VFG nodes: {len(vfg.graph.nodes())}")
for node in vfg.graph.nodes():
    print(f"VFGNode at {node.addr:#x}")

VFG Configuration

vfg = p.analyses.VFG(
    # Existing CFG (or None to generate)
    cfg=cfg,
    
    # Context sensitivity level
    context_sensitivity_level=2,
    
    # Starting address
    start=0x400000,
    function_start=0x400000,
    
    # Interfunction analysis level
    interfunction_level=1,
    
    # Custom initial state
    initial_state=my_state,
    
    # Addresses to avoid
    avoid_runs=[0x400500],
    
    # Widening/narrowing parameters
    max_iterations=40,
    max_iterations_before_widening=8,
    widening_interval=3,
)

VFG Features

1

Abstract Interpretation

VFG uses abstract interpretation with:
  • State merging at merge points
  • Widening for loop convergence
  • Narrowing for precision
2

Context Sensitivity

Tracks calling contexts for precision:
  • Level 0: Context-insensitive
  • Level 1: One caller
  • Level 2+: Multiple callers
3

Interfunction Analysis

Analyzes across function calls:
# Level 0: No interprocedural
# Level 1: One level of calls
# Level N: N levels deep
vfg = p.analyses.VFG(
    function_start=main_addr,
    interfunction_level=2
)

Accessing VFG Data

import networkx as nx

# Get node at address
node = vfg.get_any_node(0x400000)
if node:
    print(f"Node: {node}")
    print(f"State: {node.state}")
    print(f"Final states: {node.final_states}")

# Get all nodes at address (different contexts)
for node in vfg.get_all_nodes(0x400000):
    print(f"Context: {node.key}")

# Access function initial/final states
for func_addr, states in vfg.function_final_states.items():
    print(f"Function {func_addr:#x} final states:")
    for context, state in states.items():
        print(f"  Context {context}: {state}")

# Analyze the graph
for node in vfg.graph.nodes():
    preds = list(vfg.graph.predecessors(node))
    succs = list(vfg.graph.successors(node))
    print(f"{node.addr:#x}: {len(preds)} preds, {len(succs)} succs")

VFGNode Properties

node = vfg.get_any_node(0x400000)

if node:
    # Basic properties
    print(f"Address: {node.addr:#x}")
    print(f"Block ID: {node.key}")
    
    # States
    print(f"Current state: {node.state}")
    print(f"All states: {len(node.all_states)}")
    print(f"Final states: {len(node.final_states)}")
    
    # Widening info
    print(f"Widened: {node.widened_state is not None}")
    print(f"Narrowing times: {node.narrowing_times}")
    
    # Analysis artifacts
    print(f"Actions: {node.actions}")
    print(f"Events: {node.events}")

Example: Tracking Value Flow

def track_register_flow(project, function_addr, reg_name):
    """Track how a register's value flows through a function."""
    
    # Generate VFG
    vfg = project.analyses.VFG(
        function_start=function_addr,
        context_sensitivity_level=1
    )
    
    reg_offset = project.arch.registers[reg_name][0]
    
    print(f"Tracking {reg_name} in function {function_addr:#x}")
    
    # Examine each node
    for node in sorted(vfg.graph.nodes(), key=lambda n: n.addr):
        if node.state:
            reg_val = node.state.registers.load(reg_offset)
            
            # Check if symbolic
            if reg_val.symbolic:
                print(f"  {node.addr:#x}: symbolic - {reg_val}")
            else:
                concrete = node.state.solver.eval(reg_val)
                print(f"  {node.addr:#x}: concrete - {concrete:#x}")

# Usage  
track_register_flow(p, 0x400000, 'eax')

Comparing DDG vs VFG

FeatureDDGVFG
Speed⚡ Fast🐌 Slow
PrecisionApproximateHigh (with widening)
Soundness❌ Not guaranteed✅ Sound (with caveats)
RequirementsCFGEmulated with statesCan generate own CFG
Use CaseQuick dependency analysisPrecise value tracking
Choose DDG when: You need fast dependency tracking for backward slicing or quick analysis.Choose VFG when: You need precise value-flow information with abstract interpretation.

Example: Combined Analysis

import angr
import networkx as nx

def analyze_data_flow(binary_path, function_name):
    """Complete data-flow analysis combining DDG and VFG."""
    
    # Setup
    p = angr.Project(binary_path, load_options={'auto_load_libs': False})
    cfg = p.analyses.CFGFast()
    func = cfg.kb.functions[function_name]
    
    print(f"=== Analyzing {function_name} ===")
    
    # Fast DDG for dependencies
    print("\n[1] Generating DDG for quick dependency view...")
    cfg_e = p.analyses.CFGEmulated(
        starts=[func.addr],
        keep_state=True,
        state_add_options={angr.options.TRACK_MEMORY_ACTIONS},
        call_depth=1
    )
    ddg = p.analyses.DDG(cfg_e, start=func.addr)
    
    print(f"  Nodes: {len(ddg.data_graph.nodes())}")
    print(f"  Edges: {len(ddg.data_graph.edges())}")
    
    # Show some dependencies
    view = ddg.view
    for node in list(cfg_e.graph.nodes())[:5]:
        defs = view[node.addr].definitions
        if defs:
            print(f"  Definitions at {node.addr:#x}: {len(defs)}")
    
    # Precise VFG for value tracking
    print("\n[2] Generating VFG for precise value flow...")
    vfg = p.analyses.VFG(
        function_start=func.addr,
        context_sensitivity_level=1,
        interfunction_level=0
    )
    
    print(f"  VFG Nodes: {len(vfg.graph.nodes())}")
    
    # Analyze value flow
    for node in list(vfg.graph.nodes())[:5]:
        if node.state:
            print(f"  State at {node.addr:#x}: tracked")
    
    return ddg, vfg

# Usage
ddg, vfg = analyze_data_flow('/bin/ls', 'main')

Next Steps

Variable Recovery

Identify variables using data flow

CFG Analysis

Control-flow graph generation

Backward Slicing

DDG can be used for backward slicing with the BackwardSlice analysis:
import angr

p = angr.Project('binary')
cfg = p.analyses.CFGFast()

# Create a backward slice from a target address
slice_analysis = p.analyses.BackwardSlice(
    cfg=cfg,
    cdg=p.analyses.CDG(cfg),
    ddg=p.analyses.DDG(cfg),
    targets=[(0x400500, 0)]  # (address, statement_index)
)

# Get the slice - all instructions affecting the target
for addr in slice_analysis.slice:
    print(f"0x{addr:x}")
The backward slice identifies all program statements that could affect the values at the target location.