1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
//! Store the provenance for each byte in the range, with a more efficient
//! representation for the common case where PTR_SIZE consecutive bytes have the same provenance.

use std::cmp;

use rustc_data_structures::sorted_map::SortedMap;
use rustc_target::abi::{HasDataLayout, Size};

use super::{alloc_range, AllocError, AllocId, AllocRange, AllocResult, Provenance};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};

/// Stores the provenance information of pointers stored in memory.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
#[derive(HashStable)]
pub struct ProvenanceMap<Prov = AllocId> {
    /// Provenance in this map applies from the given offset for an entire pointer-size worth of
    /// bytes. Two entries in this map are always at least a pointer size apart.
    ptrs: SortedMap<Size, Prov>,
    /// Provenance in this map only applies to the given single byte.
    /// This map is disjoint from the previous. It will always be empty when
    /// `Prov::OFFSET_IS_ADDR` is false.
    bytes: Option<Box<SortedMap<Size, Prov>>>,
}

impl<D: Decoder, Prov: Provenance + Decodable<D>> Decodable<D> for ProvenanceMap<Prov> {
    fn decode(d: &mut D) -> Self {
        assert!(!Prov::OFFSET_IS_ADDR); // only `AllocId` is ever serialized
        Self { ptrs: Decodable::decode(d), bytes: None }
    }
}

impl<S: Encoder, Prov: Provenance + Encodable<S>> Encodable<S> for ProvenanceMap<Prov> {
    fn encode(&self, s: &mut S) {
        let Self { ptrs, bytes } = self;
        assert!(!Prov::OFFSET_IS_ADDR); // only `AllocId` is ever serialized
        debug_assert!(bytes.is_none());
        ptrs.encode(s)
    }
}

impl<Prov> ProvenanceMap<Prov> {
    pub fn new() -> Self {
        ProvenanceMap { ptrs: SortedMap::new(), bytes: None }
    }

    /// The caller must guarantee that the given provenance list is already sorted
    /// by address and contain no duplicates.
    pub fn from_presorted_ptrs(r: Vec<(Size, Prov)>) -> Self {
        ProvenanceMap { ptrs: SortedMap::from_presorted_elements(r), bytes: None }
    }
}

impl ProvenanceMap {
    /// Give access to the ptr-sized provenances (which can also be thought of as relocations, and
    /// indeed that is how codegen treats them).
    ///
    /// Only exposed with `AllocId` provenance, since it panics if there is bytewise provenance.
    #[inline]
    pub fn ptrs(&self) -> &SortedMap<Size, AllocId> {
        debug_assert!(self.bytes.is_none()); // `AllocId::OFFSET_IS_ADDR` is false so this cannot fail
        &self.ptrs
    }
}

impl<Prov: Provenance> ProvenanceMap<Prov> {
    /// Returns all ptr-sized provenance in the given range.
    /// If the range has length 0, returns provenance that crosses the edge between `start-1` and
    /// `start`.
    pub(super) fn range_get_ptrs(
        &self,
        range: AllocRange,
        cx: &impl HasDataLayout,
    ) -> &[(Size, Prov)] {
        // We have to go back `pointer_size - 1` bytes, as that one would still overlap with
        // the beginning of this range.
        let adjusted_start = Size::from_bytes(
            range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1),
        );
        self.ptrs.range(adjusted_start..range.end())
    }

    /// Returns all byte-wise provenance in the given range.
    fn range_get_bytes(&self, range: AllocRange) -> &[(Size, Prov)] {
        if let Some(bytes) = self.bytes.as_ref() {
            bytes.range(range.start..range.end())
        } else {
            &[]
        }
    }

    /// Get the provenance of a single byte.
    pub fn get(&self, offset: Size, cx: &impl HasDataLayout) -> Option<Prov> {
        let prov = self.range_get_ptrs(alloc_range(offset, Size::from_bytes(1)), cx);
        debug_assert!(prov.len() <= 1);
        if let Some(entry) = prov.first() {
            // If it overlaps with this byte, it is on this byte.
            debug_assert!(self.bytes.as_ref().map_or(true, |b| b.get(&offset).is_none()));
            Some(entry.1)
        } else {
            // Look up per-byte provenance.
            self.bytes.as_ref().and_then(|b| b.get(&offset).copied())
        }
    }

    /// Check if here is ptr-sized provenance at the given index.
    /// Does not mean anything for bytewise provenance! But can be useful as an optimization.
    pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
        self.ptrs.get(&offset).copied()
    }

    /// Returns whether this allocation has provenance overlapping with the given range.
    ///
    /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat
    /// limit access to provenance outside of the `Allocation` abstraction.
    ///
    pub fn range_empty(&self, range: AllocRange, cx: &impl HasDataLayout) -> bool {
        self.range_get_ptrs(range, cx).is_empty() && self.range_get_bytes(range).is_empty()
    }

    /// Yields all the provenances stored in this map.
    pub fn provenances(&self) -> impl Iterator<Item = Prov> + '_ {
        let bytes = self.bytes.iter().flat_map(|b| b.values());
        self.ptrs.values().chain(bytes).copied()
    }

    pub fn insert_ptr(&mut self, offset: Size, prov: Prov, cx: &impl HasDataLayout) {
        debug_assert!(self.range_empty(alloc_range(offset, cx.data_layout().pointer_size), cx));
        self.ptrs.insert(offset, prov);
    }

    /// Removes all provenance inside the given range.
    /// If there is provenance overlapping with the edges, might result in an error.
    pub fn clear(&mut self, range: AllocRange, cx: &impl HasDataLayout) -> AllocResult {
        let start = range.start;
        let end = range.end();
        // Clear the bytewise part -- this is easy.
        if Prov::OFFSET_IS_ADDR {
            if let Some(bytes) = self.bytes.as_mut() {
                bytes.remove_range(start..end);
            }
        } else {
            debug_assert!(self.bytes.is_none());
        }

        // For the ptr-sized part, find the first (inclusive) and last (exclusive) byte of
        // provenance that overlaps with the given range.
        let (first, last) = {
            // Find all provenance overlapping the given range.
            let provenance = self.range_get_ptrs(range, cx);
            if provenance.is_empty() {
                // No provenance in this range, we are done.
                return Ok(());
            }

            (
                provenance.first().unwrap().0,
                provenance.last().unwrap().0 + cx.data_layout().pointer_size,
            )
        };

        // We need to handle clearing the provenance from parts of a pointer.
        if first < start {
            if !Prov::OFFSET_IS_ADDR {
                // We can't split up the provenance into less than a pointer.
                return Err(AllocError::OverwritePartialPointer(first));
            }
            // Insert the remaining part in the bytewise provenance.
            let prov = self.ptrs[&first];
            let bytes = self.bytes.get_or_insert_with(Box::default);
            for offset in first..start {
                bytes.insert(offset, prov);
            }
        }
        if last > end {
            let begin_of_last = last - cx.data_layout().pointer_size;
            if !Prov::OFFSET_IS_ADDR {
                // We can't split up the provenance into less than a pointer.
                return Err(AllocError::OverwritePartialPointer(begin_of_last));
            }
            // Insert the remaining part in the bytewise provenance.
            let prov = self.ptrs[&begin_of_last];
            let bytes = self.bytes.get_or_insert_with(Box::default);
            for offset in end..last {
                bytes.insert(offset, prov);
            }
        }

        // Forget all the provenance.
        // Since provenance do not overlap, we know that removing until `last` (exclusive) is fine,
        // i.e., this will not remove any other provenance just after the ones we care about.
        self.ptrs.remove_range(first..last);

        Ok(())
    }
}

/// A partial, owned list of provenance to transfer into another allocation.
///
/// Offsets are already adjusted to the destination allocation.
pub struct ProvenanceCopy<Prov> {
    dest_ptrs: Option<Box<[(Size, Prov)]>>,
    dest_bytes: Option<Box<[(Size, Prov)]>>,
}

impl<Prov: Provenance> ProvenanceMap<Prov> {
    pub fn prepare_copy(
        &self,
        src: AllocRange,
        dest: Size,
        count: u64,
        cx: &impl HasDataLayout,
    ) -> AllocResult<ProvenanceCopy<Prov>> {
        let shift_offset = move |idx, offset| {
            // compute offset for current repetition
            let dest_offset = dest + src.size * idx; // `Size` operations
            // shift offsets from source allocation to destination allocation
            (offset - src.start) + dest_offset // `Size` operations
        };
        let ptr_size = cx.data_layout().pointer_size;

        // # Pointer-sized provenances
        // Get the provenances that are entirely within this range.
        // (Different from `range_get_ptrs` which asks if they overlap the range.)
        // Only makes sense if we are copying at least one pointer worth of bytes.
        let mut dest_ptrs_box = None;
        if src.size >= ptr_size {
            let adjusted_end = Size::from_bytes(src.end().bytes() - (ptr_size.bytes() - 1));
            let ptrs = self.ptrs.range(src.start..adjusted_end);
            // If `count` is large, this is rather wasteful -- we are allocating a big array here, which
            // is mostly filled with redundant information since it's just N copies of the same `Prov`s
            // at slightly adjusted offsets. The reason we do this is so that in `mark_provenance_range`
            // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces
            // the right sequence of provenance for all N copies.
            // Basically, this large array would have to be created anyway in the target allocation.
            let mut dest_ptrs = Vec::with_capacity(ptrs.len() * (count as usize));
            for i in 0..count {
                dest_ptrs
                    .extend(ptrs.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc)));
            }
            debug_assert_eq!(dest_ptrs.len(), dest_ptrs.capacity());
            dest_ptrs_box = Some(dest_ptrs.into_boxed_slice());
        };

        // # Byte-sized provenances
        // This includes the existing bytewise provenance in the range, and ptr provenance
        // that overlaps with the begin/end of the range.
        let mut dest_bytes_box = None;
        let begin_overlap = self.range_get_ptrs(alloc_range(src.start, Size::ZERO), cx).first();
        let end_overlap = self.range_get_ptrs(alloc_range(src.end(), Size::ZERO), cx).first();
        if !Prov::OFFSET_IS_ADDR {
            // There can't be any bytewise provenance, and we cannot split up the begin/end overlap.
            if let Some(entry) = begin_overlap {
                return Err(AllocError::ReadPartialPointer(entry.0));
            }
            if let Some(entry) = end_overlap {
                return Err(AllocError::ReadPartialPointer(entry.0));
            }
            debug_assert!(self.bytes.is_none());
        } else {
            let mut bytes = Vec::new();
            // First, if there is a part of a pointer at the start, add that.
            if let Some(entry) = begin_overlap {
                trace!("start overlapping entry: {entry:?}");
                // For really small copies, make sure we don't run off the end of the `src` range.
                let entry_end = cmp::min(entry.0 + ptr_size, src.end());
                for offset in src.start..entry_end {
                    bytes.push((offset, entry.1));
                }
            } else {
                trace!("no start overlapping entry");
            }
            // Then the main part, bytewise provenance from `self.bytes`.
            if let Some(all_bytes) = self.bytes.as_ref() {
                bytes.extend(all_bytes.range(src.start..src.end()));
            }
            // And finally possibly parts of a pointer at the end.
            if let Some(entry) = end_overlap {
                trace!("end overlapping entry: {entry:?}");
                // For really small copies, make sure we don't start before `src` does.
                let entry_start = cmp::max(entry.0, src.start);
                for offset in entry_start..src.end() {
                    if bytes.last().map_or(true, |bytes_entry| bytes_entry.0 < offset) {
                        // The last entry, if it exists, has a lower offset than us.
                        bytes.push((offset, entry.1));
                    } else {
                        // There already is an entry for this offset in there! This can happen when the
                        // start and end range checks actually end up hitting the same pointer, so we
                        // already added this in the "pointer at the start" part above.
                        assert!(entry.0 <= src.start);
                    }
                }
            } else {
                trace!("no end overlapping entry");
            }
            trace!("byte provenances: {bytes:?}");

            // And again a buffer for the new list on the target side.
            let mut dest_bytes = Vec::with_capacity(bytes.len() * (count as usize));
            for i in 0..count {
                dest_bytes
                    .extend(bytes.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc)));
            }
            debug_assert_eq!(dest_bytes.len(), dest_bytes.capacity());
            dest_bytes_box = Some(dest_bytes.into_boxed_slice());
        }

        Ok(ProvenanceCopy { dest_ptrs: dest_ptrs_box, dest_bytes: dest_bytes_box })
    }

    /// Applies a provenance copy.
    /// The affected range, as defined in the parameters to `prepare_copy` is expected
    /// to be clear of provenance.
    pub fn apply_copy(&mut self, copy: ProvenanceCopy<Prov>) {
        if let Some(dest_ptrs) = copy.dest_ptrs {
            self.ptrs.insert_presorted(dest_ptrs.into());
        }
        if Prov::OFFSET_IS_ADDR {
            if let Some(dest_bytes) = copy.dest_bytes && !dest_bytes.is_empty() {
                self.bytes.get_or_insert_with(Box::default).insert_presorted(dest_bytes.into());
            }
        } else {
            debug_assert!(copy.dest_bytes.is_none());
        }
    }
}