1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
pub struct WordStrBuf {
    start: *mut u8,
    cur: *mut u8,
    end: *mut u8,
    holding: Holding,
}

enum Holding {
    None,
    Word((*mut u8, usize)),
    Str((*mut u8, usize)),
}

/// Errors returned by [`WordStrBuf::fill`].
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum FillError {
    /// The [`WordStrBuf`] does not have sufficient capacity for the provided
    /// input.
    NoCapacity(usize),
    /// The input string contains non-ASCII characters.
    NotAscii,
}

/// Errors returned by [`WordStrBuf::advance_str`] indicating that an invalid
/// string literal was found.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum StrLiteralError {
    /// The current word is not the beginning of a string literal (`."`).
    NotAStr,
    /// The `."` was not followed by anything.
    Empty,
    /// The string literal was not terminated by a `"`.
    Unterminated,
}

impl WordStrBuf {
    pub fn new(bottom: *mut u8, size: usize) -> Self {
        let end = bottom.wrapping_add(size);
        debug_assert!(end >= bottom);
        Self {
            end,
            start: bottom,
            cur: end,
            holding: Holding::None,
        }
    }

    #[inline]
    fn capacity(&self) -> usize {
        (self.end as usize) - (self.start as usize)
    }

    pub fn fill(&mut self, input: &str) -> Result<(), FillError> {
        let ilen = input.len();
        let cap = self.capacity();
        if ilen > cap {
            return Err(FillError::NoCapacity(cap));
        }
        if !input.is_ascii() {
            // TODO: Do I care about this?
            return Err(FillError::NotAscii);
        }
        // TODO: I probably *don't* want to lowercase everything, this also affects
        // things like string literals, which don't need to be lowercased.
        unsafe {
            let istart = input.as_bytes().as_ptr();
            for i in 0..ilen {
                self.start
                    .add(i)
                    .write((istart.add(i).read()).to_ascii_lowercase());
            }
            core::ptr::write_bytes(self.start.add(ilen), b' ', cap - ilen);
        }
        self.cur = self.start;
        Ok(())
    }

    // Move `self.cur` to the next non-whitespace character,
    // and return the value of `self.cur` after moving.
    //
    // Returns `None` if we hit the end.
    fn next_nonwhitespace(&mut self) -> Option<*mut u8> {
        loop {
            if self.cur == self.end {
                return None;
            }
            if !unsafe { *self.cur }.is_ascii_whitespace() {
                return Some(self.cur);
            }
            self.cur = self.cur.wrapping_add(1);
        }
    }

    pub fn advance(&mut self) {
        self.holding = Holding::None;

        // Find the start, skipping any ASCII whitespace
        let start = match self.next_nonwhitespace() {
            Some(s) => s,
            None => return,
        };
        // Find the end, either the first ASCII whitespace, or the end of the buffer
        // This is ONE PAST the last character
        let end = loop {
            if self.cur == self.end {
                break self.end;
            }
            if unsafe { *self.cur }.is_ascii_whitespace() {
                break self.cur;
            }
            self.cur = self.cur.wrapping_add(1);
        };
        let size = (end as usize) - (start as usize);
        self.holding = Holding::Word((start, size));
    }

    pub fn advance_str(&mut self) -> Result<(), StrLiteralError> {
        if self.cur_word() == Some(r#".""#) {
            self.holding = Holding::None;
        } else {
            return Err(StrLiteralError::NotAStr);
        }

        let start = match self.next_nonwhitespace() {
            Some(s) => s,
            None => return Err(StrLiteralError::Empty),
        };

        let end = loop {
            if self.cur == self.end {
                return Err(StrLiteralError::Unterminated);
            }
            if unsafe { *self.cur } == b'"' {
                // Move past the quote by one. Okay if this is now END.
                let pre_quote = self.cur;
                self.cur = self.cur.wrapping_add(1);
                break pre_quote;
            }
            self.cur = self.cur.wrapping_add(1);
        };

        let size = (end as usize) - (start as usize);
        self.holding = Holding::Str((start, size));
        Ok(())
    }

    pub fn cur_str_literal(&self) -> Option<&str> {
        match &self.holding {
            Holding::None => None,
            Holding::Str((start, len)) => Some(unsafe {
                let u8_sli = core::slice::from_raw_parts(*start, *len);
                core::str::from_utf8_unchecked(u8_sli)
            }),
            Holding::Word(_) => None,
        }
    }

    pub fn cur_word(&self) -> Option<&str> {
        match &self.holding {
            Holding::None => None,
            Holding::Word((start, len)) => Some(unsafe {
                let u8_sli = core::slice::from_raw_parts(*start, *len);
                core::str::from_utf8_unchecked(u8_sli)
            }),
            Holding::Str(_) => None,
        }
    }
}