rustc_mir_transform/
simplify.rs

1//! A number of passes which remove various redundancies in the CFG.
2//!
3//! The `SimplifyCfg` pass gets rid of unnecessary blocks in the CFG, whereas the `SimplifyLocals`
4//! gets rid of all the unnecessary local variable declarations.
5//!
6//! The `SimplifyLocals` pass is kinda expensive and therefore not very suitable to be run often.
7//! Most of the passes should not care or be impacted in meaningful ways due to extra locals
8//! either, so running the pass once, right before codegen, should suffice.
9//!
10//! On the other side of the spectrum, the `SimplifyCfg` pass is considerably cheap to run, thus
11//! one should run it after every pass which may modify CFG in significant ways. This pass must
12//! also be run before any analysis passes because it removes dead blocks, and some of these can be
13//! ill-typed.
14//!
15//! The cause of this typing issue is typeck allowing most blocks whose end is not reachable have
16//! an arbitrary return type, rather than having the usual () return type (as a note, typeck's
17//! notion of reachability is in fact slightly weaker than MIR CFG reachability - see #31617). A
18//! standard example of the situation is:
19//!
20//! ```rust
21//!   fn example() {
22//!       let _a: char = { return; };
23//!   }
24//! ```
25//!
26//! Here the block (`{ return; }`) has the return type `char`, rather than `()`, but the MIR we
27//! naively generate still contains the `_a = ()` write in the unreachable block "after" the
28//! return.
29//!
30//! **WARNING**: This is one of the few optimizations that runs on built and analysis MIR, and
31//! so its effects may affect the type-checking, borrow-checking, and other analysis of MIR.
32//! We must be extremely careful to only apply optimizations that preserve UB and all
33//! non-determinism, since changes here can affect which programs compile in an insta-stable way.
34//! The normal logic that a program with UB can be changed to do anything does not apply to
35//! pre-"runtime" MIR!
36
37use rustc_index::{Idx, IndexSlice, IndexVec};
38use rustc_middle::mir::visit::{MutVisitor, MutatingUseContext, PlaceContext, Visitor};
39use rustc_middle::mir::*;
40use rustc_middle::ty::TyCtxt;
41use rustc_span::DUMMY_SP;
42use smallvec::SmallVec;
43use tracing::{debug, trace};
44
45pub(super) enum SimplifyCfg {
46    Initial,
47    PromoteConsts,
48    RemoveFalseEdges,
49    /// Runs at the beginning of "analysis to runtime" lowering, *before* drop elaboration.
50    PostAnalysis,
51    /// Runs at the end of "analysis to runtime" lowering, *after* drop elaboration.
52    /// This is before the main optimization passes on runtime MIR kick in.
53    PreOptimizations,
54    Final,
55    MakeShim,
56    AfterUnreachableEnumBranching,
57}
58
59impl SimplifyCfg {
60    fn name(&self) -> &'static str {
61        match self {
62            SimplifyCfg::Initial => "SimplifyCfg-initial",
63            SimplifyCfg::PromoteConsts => "SimplifyCfg-promote-consts",
64            SimplifyCfg::RemoveFalseEdges => "SimplifyCfg-remove-false-edges",
65            SimplifyCfg::PostAnalysis => "SimplifyCfg-post-analysis",
66            SimplifyCfg::PreOptimizations => "SimplifyCfg-pre-optimizations",
67            SimplifyCfg::Final => "SimplifyCfg-final",
68            SimplifyCfg::MakeShim => "SimplifyCfg-make_shim",
69            SimplifyCfg::AfterUnreachableEnumBranching => {
70                "SimplifyCfg-after-unreachable-enum-branching"
71            }
72        }
73    }
74}
75
76pub(super) fn simplify_cfg<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
77    if CfgSimplifier::new(tcx, body).simplify() {
78        // `simplify` returns that it changed something. We must invalidate the CFG caches as they
79        // are not consistent with the modified CFG any more.
80        body.basic_blocks.invalidate_cfg_cache();
81    }
82    remove_dead_blocks(body);
83
84    // FIXME: Should probably be moved into some kind of pass manager
85    body.basic_blocks.as_mut_preserves_cfg().shrink_to_fit();
86}
87
88impl<'tcx> crate::MirPass<'tcx> for SimplifyCfg {
89    fn name(&self) -> &'static str {
90        self.name()
91    }
92
93    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
94        debug!("SimplifyCfg({:?}) - simplifying {:?}", self.name(), body.source);
95        simplify_cfg(tcx, body);
96    }
97
98    fn is_required(&self) -> bool {
99        false
100    }
101}
102
103struct CfgSimplifier<'a, 'tcx> {
104    preserve_switch_reads: bool,
105    basic_blocks: &'a mut IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
106    pred_count: IndexVec<BasicBlock, u32>,
107}
108
109impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
110    fn new(tcx: TyCtxt<'tcx>, body: &'a mut Body<'tcx>) -> Self {
111        let mut pred_count = IndexVec::from_elem(0u32, &body.basic_blocks);
112
113        // we can't use mir.predecessors() here because that counts
114        // dead blocks, which we don't want to.
115        pred_count[START_BLOCK] = 1;
116
117        for (_, data) in traversal::preorder(body) {
118            if let Some(ref term) = data.terminator {
119                for tgt in term.successors() {
120                    pred_count[tgt] += 1;
121                }
122            }
123        }
124
125        // Preserve `SwitchInt` reads on built and analysis MIR, or if `-Zmir-preserve-ub`.
126        let preserve_switch_reads = matches!(body.phase, MirPhase::Built | MirPhase::Analysis(_))
127            || tcx.sess.opts.unstable_opts.mir_preserve_ub;
128        // Do not clear caches yet. The caller to `simplify` will do it if anything changed.
129        let basic_blocks = body.basic_blocks.as_mut_preserves_cfg();
130
131        CfgSimplifier { preserve_switch_reads, basic_blocks, pred_count }
132    }
133
134    /// Returns whether we actually simplified anything. In that case, the caller *must* invalidate
135    /// the CFG caches of the MIR body.
136    #[must_use]
137    fn simplify(mut self) -> bool {
138        self.strip_nops();
139
140        // Vec of the blocks that should be merged. We store the indices here, instead of the
141        // statements itself to avoid moving the (relatively) large statements twice.
142        // We do not push the statements directly into the target block (`bb`) as that is slower
143        // due to additional reallocations
144        let mut merged_blocks = Vec::new();
145        let mut outer_changed = false;
146        loop {
147            let mut changed = false;
148
149            for bb in self.basic_blocks.indices() {
150                if self.pred_count[bb] == 0 {
151                    continue;
152                }
153
154                debug!("simplifying {:?}", bb);
155
156                let mut terminator =
157                    self.basic_blocks[bb].terminator.take().expect("invalid terminator state");
158
159                terminator
160                    .successors_mut(|successor| self.collapse_goto_chain(successor, &mut changed));
161
162                let mut inner_changed = true;
163                merged_blocks.clear();
164                while inner_changed {
165                    inner_changed = false;
166                    inner_changed |= self.simplify_branch(&mut terminator);
167                    inner_changed |= self.merge_successor(&mut merged_blocks, &mut terminator);
168                    changed |= inner_changed;
169                }
170
171                let statements_to_merge =
172                    merged_blocks.iter().map(|&i| self.basic_blocks[i].statements.len()).sum();
173
174                if statements_to_merge > 0 {
175                    let mut statements = std::mem::take(&mut self.basic_blocks[bb].statements);
176                    statements.reserve(statements_to_merge);
177                    for &from in &merged_blocks {
178                        statements.append(&mut self.basic_blocks[from].statements);
179                    }
180                    self.basic_blocks[bb].statements = statements;
181                }
182
183                self.basic_blocks[bb].terminator = Some(terminator);
184            }
185
186            if !changed {
187                break;
188            }
189
190            outer_changed = true;
191        }
192
193        outer_changed
194    }
195
196    /// This function will return `None` if
197    /// * the block has statements
198    /// * the block has a terminator other than `goto`
199    /// * the block has no terminator (meaning some other part of the current optimization stole it)
200    fn take_terminator_if_simple_goto(&mut self, bb: BasicBlock) -> Option<Terminator<'tcx>> {
201        match self.basic_blocks[bb] {
202            BasicBlockData {
203                ref statements,
204                terminator:
205                    ref mut terminator @ Some(Terminator { kind: TerminatorKind::Goto { .. }, .. }),
206                ..
207            } if statements.is_empty() => terminator.take(),
208            // if `terminator` is None, this means we are in a loop. In that
209            // case, let all the loop collapse to its entry.
210            _ => None,
211        }
212    }
213
214    /// Collapse a goto chain starting from `start`
215    fn collapse_goto_chain(&mut self, start: &mut BasicBlock, changed: &mut bool) {
216        // Using `SmallVec` here, because in some logs on libcore oli-obk saw many single-element
217        // goto chains. We should probably benchmark different sizes.
218        let mut terminators: SmallVec<[_; 1]> = Default::default();
219        let mut current = *start;
220        while let Some(terminator) = self.take_terminator_if_simple_goto(current) {
221            let Terminator { kind: TerminatorKind::Goto { target }, .. } = terminator else {
222                unreachable!();
223            };
224            terminators.push((current, terminator));
225            current = target;
226        }
227        let last = current;
228        *changed |= *start != last;
229        *start = last;
230        while let Some((current, mut terminator)) = terminators.pop() {
231            let Terminator { kind: TerminatorKind::Goto { ref mut target }, .. } = terminator
232            else {
233                unreachable!();
234            };
235            *changed |= *target != last;
236            *target = last;
237            debug!("collapsing goto chain from {:?} to {:?}", current, target);
238
239            if self.pred_count[current] == 1 {
240                // This is the last reference to current, so the pred-count to
241                // to target is moved into the current block.
242                self.pred_count[current] = 0;
243            } else {
244                self.pred_count[*target] += 1;
245                self.pred_count[current] -= 1;
246            }
247            self.basic_blocks[current].terminator = Some(terminator);
248        }
249    }
250
251    // merge a block with 1 `goto` predecessor to its parent
252    fn merge_successor(
253        &mut self,
254        merged_blocks: &mut Vec<BasicBlock>,
255        terminator: &mut Terminator<'tcx>,
256    ) -> bool {
257        let target = match terminator.kind {
258            TerminatorKind::Goto { target } if self.pred_count[target] == 1 => target,
259            _ => return false,
260        };
261
262        debug!("merging block {:?} into {:?}", target, terminator);
263        *terminator = match self.basic_blocks[target].terminator.take() {
264            Some(terminator) => terminator,
265            None => {
266                // unreachable loop - this should not be possible, as we
267                // don't strand blocks, but handle it correctly.
268                return false;
269            }
270        };
271
272        merged_blocks.push(target);
273        self.pred_count[target] = 0;
274
275        true
276    }
277
278    // turn a branch with all successors identical to a goto
279    fn simplify_branch(&mut self, terminator: &mut Terminator<'tcx>) -> bool {
280        // Removing a `SwitchInt` terminator may remove reads that result in UB,
281        // so we must not apply this optimization before borrowck or when
282        // `-Zmir-preserve-ub` is set.
283        if self.preserve_switch_reads {
284            return false;
285        }
286
287        let TerminatorKind::SwitchInt { .. } = terminator.kind else {
288            return false;
289        };
290
291        let first_succ = {
292            if let Some(first_succ) = terminator.successors().next() {
293                if terminator.successors().all(|s| s == first_succ) {
294                    let count = terminator.successors().count();
295                    self.pred_count[first_succ] -= (count - 1) as u32;
296                    first_succ
297                } else {
298                    return false;
299                }
300            } else {
301                return false;
302            }
303        };
304
305        debug!("simplifying branch {:?}", terminator);
306        terminator.kind = TerminatorKind::Goto { target: first_succ };
307        true
308    }
309
310    fn strip_nops(&mut self) {
311        for blk in self.basic_blocks.iter_mut() {
312            blk.statements.retain(|stmt| !matches!(stmt.kind, StatementKind::Nop))
313        }
314    }
315}
316
317pub(super) fn simplify_duplicate_switch_targets(terminator: &mut Terminator<'_>) {
318    if let TerminatorKind::SwitchInt { targets, .. } = &mut terminator.kind {
319        let otherwise = targets.otherwise();
320        if targets.iter().any(|t| t.1 == otherwise) {
321            *targets = SwitchTargets::new(
322                targets.iter().filter(|t| t.1 != otherwise),
323                targets.otherwise(),
324            );
325        }
326    }
327}
328
329pub(super) fn remove_dead_blocks(body: &mut Body<'_>) {
330    let should_deduplicate_unreachable = |bbdata: &BasicBlockData<'_>| {
331        // CfgSimplifier::simplify leaves behind some unreachable basic blocks without a
332        // terminator. Those blocks will be deleted by remove_dead_blocks, but we run just
333        // before then so we need to handle missing terminators.
334        // We also need to prevent confusing cleanup and non-cleanup blocks. In practice we
335        // don't emit empty unreachable cleanup blocks, so this simple check suffices.
336        bbdata.terminator.is_some() && bbdata.is_empty_unreachable() && !bbdata.is_cleanup
337    };
338
339    let reachable = traversal::reachable_as_bitset(body);
340    let empty_unreachable_blocks = body
341        .basic_blocks
342        .iter_enumerated()
343        .filter(|(bb, bbdata)| should_deduplicate_unreachable(bbdata) && reachable.contains(*bb))
344        .count();
345
346    let num_blocks = body.basic_blocks.len();
347    if num_blocks == reachable.count() && empty_unreachable_blocks <= 1 {
348        return;
349    }
350
351    let basic_blocks = body.basic_blocks.as_mut();
352
353    let mut replacements: Vec<_> = (0..num_blocks).map(BasicBlock::new).collect();
354    let mut orig_index = 0;
355    let mut used_index = 0;
356    let mut kept_unreachable = None;
357    let mut deduplicated_unreachable = false;
358    basic_blocks.raw.retain(|bbdata| {
359        let orig_bb = BasicBlock::new(orig_index);
360        if !reachable.contains(orig_bb) {
361            orig_index += 1;
362            return false;
363        }
364
365        let used_bb = BasicBlock::new(used_index);
366        if should_deduplicate_unreachable(bbdata) {
367            let kept_unreachable = *kept_unreachable.get_or_insert(used_bb);
368            if kept_unreachable != used_bb {
369                replacements[orig_index] = kept_unreachable;
370                deduplicated_unreachable = true;
371                orig_index += 1;
372                return false;
373            }
374        }
375
376        replacements[orig_index] = used_bb;
377        used_index += 1;
378        orig_index += 1;
379        true
380    });
381
382    // If we deduplicated unreachable blocks we erase their source_info as we
383    // can no longer attribute their code to a particular location in the
384    // source.
385    if deduplicated_unreachable {
386        basic_blocks[kept_unreachable.unwrap()].terminator_mut().source_info =
387            SourceInfo { span: DUMMY_SP, scope: OUTERMOST_SOURCE_SCOPE };
388    }
389
390    for block in basic_blocks {
391        block.terminator_mut().successors_mut(|target| *target = replacements[target.index()]);
392    }
393}
394
395pub(super) enum SimplifyLocals {
396    BeforeConstProp,
397    AfterGVN,
398    Final,
399}
400
401impl<'tcx> crate::MirPass<'tcx> for SimplifyLocals {
402    fn name(&self) -> &'static str {
403        match &self {
404            SimplifyLocals::BeforeConstProp => "SimplifyLocals-before-const-prop",
405            SimplifyLocals::AfterGVN => "SimplifyLocals-after-value-numbering",
406            SimplifyLocals::Final => "SimplifyLocals-final",
407        }
408    }
409
410    fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
411        sess.mir_opt_level() > 0
412    }
413
414    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
415        trace!("running SimplifyLocals on {:?}", body.source);
416
417        // First, we're going to get a count of *actual* uses for every `Local`.
418        let mut used_locals = UsedLocals::new(body);
419
420        // Next, we're going to remove any `Local` with zero actual uses. When we remove those
421        // `Locals`, we're also going to subtract any uses of other `Locals` from the `used_locals`
422        // count. For example, if we removed `_2 = discriminant(_1)`, then we'll subtract one from
423        // `use_counts[_1]`. That in turn might make `_1` unused, so we loop until we hit a
424        // fixedpoint where there are no more unused locals.
425        remove_unused_definitions_helper(&mut used_locals, body);
426
427        // Finally, we'll actually do the work of shrinking `body.local_decls` and remapping the
428        // `Local`s.
429        let map = make_local_map(&mut body.local_decls, &used_locals);
430
431        // Only bother running the `LocalUpdater` if we actually found locals to remove.
432        if map.iter().any(Option::is_none) {
433            // Update references to all vars and tmps now
434            let mut updater = LocalUpdater { map, tcx };
435            updater.visit_body_preserves_cfg(body);
436
437            body.local_decls.shrink_to_fit();
438        }
439    }
440
441    fn is_required(&self) -> bool {
442        false
443    }
444}
445
446pub(super) fn remove_unused_definitions<'tcx>(body: &mut Body<'tcx>) {
447    // First, we're going to get a count of *actual* uses for every `Local`.
448    let mut used_locals = UsedLocals::new(body);
449
450    // Next, we're going to remove any `Local` with zero actual uses. When we remove those
451    // `Locals`, we're also going to subtract any uses of other `Locals` from the `used_locals`
452    // count. For example, if we removed `_2 = discriminant(_1)`, then we'll subtract one from
453    // `use_counts[_1]`. That in turn might make `_1` unused, so we loop until we hit a
454    // fixedpoint where there are no more unused locals.
455    remove_unused_definitions_helper(&mut used_locals, body);
456}
457
458/// Construct the mapping while swapping out unused stuff out from the `vec`.
459fn make_local_map<V>(
460    local_decls: &mut IndexVec<Local, V>,
461    used_locals: &UsedLocals,
462) -> IndexVec<Local, Option<Local>> {
463    let mut map: IndexVec<Local, Option<Local>> = IndexVec::from_elem(None, local_decls);
464    let mut used = Local::ZERO;
465
466    for alive_index in local_decls.indices() {
467        // `is_used` treats the `RETURN_PLACE` and arguments as used.
468        if !used_locals.is_used(alive_index) {
469            continue;
470        }
471
472        map[alive_index] = Some(used);
473        if alive_index != used {
474            local_decls.swap(alive_index, used);
475        }
476        used.increment_by(1);
477    }
478    local_decls.truncate(used.index());
479    map
480}
481
482/// Keeps track of used & unused locals.
483struct UsedLocals {
484    increment: bool,
485    arg_count: u32,
486    use_count: IndexVec<Local, u32>,
487}
488
489impl UsedLocals {
490    /// Determines which locals are used & unused in the given body.
491    fn new(body: &Body<'_>) -> Self {
492        let mut this = Self {
493            increment: true,
494            arg_count: body.arg_count.try_into().unwrap(),
495            use_count: IndexVec::from_elem(0, &body.local_decls),
496        };
497        this.visit_body(body);
498        this
499    }
500
501    /// Checks if local is used.
502    ///
503    /// Return place and arguments are always considered used.
504    fn is_used(&self, local: Local) -> bool {
505        trace!("is_used({:?}): use_count: {:?}", local, self.use_count[local]);
506        local.as_u32() <= self.arg_count || self.use_count[local] != 0
507    }
508
509    /// Updates the use counts to reflect the removal of given statement.
510    fn statement_removed(&mut self, statement: &Statement<'_>) {
511        self.increment = false;
512
513        // The location of the statement is irrelevant.
514        let location = Location::START;
515        self.visit_statement(statement, location);
516    }
517
518    /// Visits a left-hand side of an assignment.
519    fn visit_lhs(&mut self, place: &Place<'_>, location: Location) {
520        if place.is_indirect() {
521            // A use, not a definition.
522            self.visit_place(place, PlaceContext::MutatingUse(MutatingUseContext::Store), location);
523        } else {
524            // A definition. The base local itself is not visited, so this occurrence is not counted
525            // toward its use count. There might be other locals still, used in an indexing
526            // projection.
527            self.super_projection(
528                place.as_ref(),
529                PlaceContext::MutatingUse(MutatingUseContext::Projection),
530                location,
531            );
532        }
533    }
534}
535
536impl<'tcx> Visitor<'tcx> for UsedLocals {
537    fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
538        match statement.kind {
539            StatementKind::Intrinsic(..)
540            | StatementKind::Retag(..)
541            | StatementKind::Coverage(..)
542            | StatementKind::FakeRead(..)
543            | StatementKind::PlaceMention(..)
544            | StatementKind::AscribeUserType(..) => {
545                self.super_statement(statement, location);
546            }
547
548            StatementKind::ConstEvalCounter | StatementKind::Nop => {}
549
550            StatementKind::StorageLive(_local) | StatementKind::StorageDead(_local) => {}
551
552            StatementKind::Assign(box (ref place, ref rvalue)) => {
553                if rvalue.is_safe_to_remove() {
554                    self.visit_lhs(place, location);
555                    self.visit_rvalue(rvalue, location);
556                } else {
557                    self.super_statement(statement, location);
558                }
559            }
560
561            StatementKind::SetDiscriminant { ref place, variant_index: _ }
562            | StatementKind::Deinit(ref place)
563            | StatementKind::BackwardIncompatibleDropHint { ref place, reason: _ } => {
564                self.visit_lhs(place, location);
565            }
566        }
567    }
568
569    fn visit_local(&mut self, local: Local, _ctx: PlaceContext, _location: Location) {
570        if self.increment {
571            self.use_count[local] += 1;
572        } else {
573            assert_ne!(self.use_count[local], 0);
574            self.use_count[local] -= 1;
575        }
576    }
577}
578
579/// Removes unused definitions. Updates the used locals to reflect the changes made.
580fn remove_unused_definitions_helper(used_locals: &mut UsedLocals, body: &mut Body<'_>) {
581    // The use counts are updated as we remove the statements. A local might become unused
582    // during the retain operation, leading to a temporary inconsistency (storage statements or
583    // definitions referencing the local might remain). For correctness it is crucial that this
584    // computation reaches a fixed point.
585
586    let mut modified = true;
587    while modified {
588        modified = false;
589
590        for data in body.basic_blocks.as_mut_preserves_cfg() {
591            // Remove unnecessary StorageLive and StorageDead annotations.
592            data.statements.retain(|statement| {
593                let keep = match &statement.kind {
594                    StatementKind::StorageLive(local) | StatementKind::StorageDead(local) => {
595                        used_locals.is_used(*local)
596                    }
597                    StatementKind::Assign(box (place, _)) => used_locals.is_used(place.local),
598
599                    StatementKind::SetDiscriminant { place, .. }
600                    | StatementKind::BackwardIncompatibleDropHint { place, reason: _ }
601                    | StatementKind::Deinit(place) => used_locals.is_used(place.local),
602                    StatementKind::Nop => false,
603                    _ => true,
604                };
605
606                if !keep {
607                    trace!("removing statement {:?}", statement);
608                    modified = true;
609                    used_locals.statement_removed(statement);
610                }
611
612                keep
613            });
614        }
615    }
616}
617
618struct LocalUpdater<'tcx> {
619    map: IndexVec<Local, Option<Local>>,
620    tcx: TyCtxt<'tcx>,
621}
622
623impl<'tcx> MutVisitor<'tcx> for LocalUpdater<'tcx> {
624    fn tcx(&self) -> TyCtxt<'tcx> {
625        self.tcx
626    }
627
628    fn visit_local(&mut self, l: &mut Local, _: PlaceContext, _: Location) {
629        *l = self.map[*l].unwrap();
630    }
631}