StatProfilerHTML.jl report
Generated on tor 10 okt 2019 11:38:33
File source code
Line Exclusive Inclusive Code
1 # This file is a part of Julia. License is MIT: https://julialang.org/license
2
3 """
4 StringIndexError(str, i)
5
6 An error occurred when trying to access `str` at index `i` that is not valid.
7 """
8 struct StringIndexError <: Exception
9 string::AbstractString
10 index::Integer
11 end
12 @noinline string_index_err(s::AbstractString, i::Integer) =
13 throw(StringIndexError(s, Int(i)))
14
15 const ByteArray = Union{Vector{UInt8},Vector{Int8}}
16
17 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
18
19 ## constructors and conversions ##
20
21 # String constructor docstring from boot.jl, workaround for #16730
22 # and the unavailability of @doc in boot.jl context.
23 """
24 String(v::AbstractVector{UInt8})
25
26 Create a new `String` object from a byte vector `v` containing UTF-8 encoded
27 characters. If `v` is `Vector{UInt8}` it will be truncated to zero length and
28 future modification of `v` cannot affect the contents of the resulting string.
29 To avoid truncation use `String(copy(v))`.
30
31 When possible, the memory of `v` will be used without copying when the `String`
32 object is created. This is guaranteed to be the case for byte vectors returned
33 by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
34 [`read(io, nb)`](@ref). This allows zero-copy conversion of I/O data to strings.
35 In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
36 to guarantee consistent behavior.
37 """
38 String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
39 1 (2.38%) 1 (2.38%)
1 (2.38%) samples spent in Type
1 (100.00%) (ex.), 1 (100.00%) (incl.) when called from print_to_string line 124
String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)
40
41 """
42 unsafe_string(p::Ptr{UInt8}, [length::Integer])
43
44 Copy a string from the address of a C-style (NUL-terminated) string encoded as UTF-8.
45 (The pointer can be safely freed afterwards.) If `length` is specified
46 (the length of the data in bytes), the string does not have to be NUL-terminated.
47
48 This function is labeled "unsafe" because it will crash if `p` is not
49 a valid memory address to data of the requested length.
50 """
51 function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer)
52 p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
53 ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), p, len)
54 end
55 function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
56 p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
57 ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
58 end
59
60 5 (11.90%) 5 (11.90%)
5 (11.90%) samples spent in _string_n
5 (100.00%) (ex.), 5 (100.00%) (incl.) when called from StringVector line 31
_string_n(n::Integer) = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), n)
61
62 """
63 String(s::AbstractString)
64
65 Convert a string to a contiguous byte array representation encoded as UTF-8 bytes.
66 This representation is often appropriate for passing strings to C.
67 """
68 String(s::AbstractString) = print_to_string(s)
69 String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
70
71 6 (14.29%) 6 (14.29%)
6 (14.29%) samples spent in StringVector
6 (100.00%) (ex.), 6 (100.00%) (incl.) when called from #IOBuffer#302 line 114
unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
72
73 (::Type{Vector{UInt8}})(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
74
75 String(s::CodeUnits{UInt8,String}) = s.s
76
77 ## low-level functions ##
78
79 pointer(s::String) = unsafe_convert(Ptr{UInt8}, s)
80 pointer(s::String, i::Integer) = pointer(s)+(i-1)
81
82 ncodeunits(s::String) = Core.sizeof(s)
83 sizeof(s::String) = Core.sizeof(s)
84 codeunit(s::String) = UInt8
85
86 @inline function codeunit(s::String, i::Integer)
87 @boundscheck checkbounds(s, i)
88 GC.@preserve s unsafe_load(pointer(s, i))
89 end
90
91 ## comparison ##
92
93 function cmp(a::String, b::String)
94 al, bl = sizeof(a), sizeof(b)
95 c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
96 a, b, min(al,bl))
97 return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
98 end
99
100 function ==(a::String, b::String)
101 al = sizeof(a)
102 al == sizeof(b) && 0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, al)
103 end
104
105 typemin(::Type{String}) = ""
106 typemin(::String) = typemin(String)
107
108 ## thisind, nextind ##
109
110 1 (2.38%) 1 (2.38%)
1 (2.38%) samples spent in lastindex
1 (100.00%) (ex.), 1 (100.00%) (incl.) when called from tostr_sizehint line 107
thisind(s::String, i::Int) = _thisind_str(s, i)
111
112 # s should be String or SubString{String}
113
1 (2.38%) samples spent in _thisind_str
1 (100.00%) (ex.), 1 (100.00%) (incl.) when called from print_to_string line 117
function _thisind_str(s, i::Int)
114 1 (2.38%) 1 (2.38%) i == 0 && return 0
115 n = ncodeunits(s)
116 i == n + 1 && return i
117 @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
118 @inbounds b = codeunit(s, i)
119 (b & 0xc0 == 0x80) & (i-1 > 0) || return i
120 @inbounds b = codeunit(s, i-1)
121 between(b, 0b11000000, 0b11110111) && return i-1
122 (b & 0xc0 == 0x80) & (i-2 > 0) || return i
123 @inbounds b = codeunit(s, i-2)
124 between(b, 0b11100000, 0b11110111) && return i-2
125 (b & 0xc0 == 0x80) & (i-3 > 0) || return i
126 @inbounds b = codeunit(s, i-3)
127 between(b, 0b11110000, 0b11110111) && return i-3
128 return i
129 end
130
131 nextind(s::String, i::Int) = _nextind_str(s, i)
132
133 # s should be String or SubString{String}
134 function _nextind_str(s, i::Int)
135 i == 0 && return 1
136 n = ncodeunits(s)
137 @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
138 @inbounds l = codeunit(s, i)
139 (l < 0x80) | (0xf8 ≤ l) && return i+1
140 if l < 0xc0
141 i′ = thisind(s, i)
142 return i′ < i ? nextind(s, i′) : i+1
143 end
144 # first continuation byte
145 (i += 1) > n && return i
146 @inbounds b = codeunit(s, i)
147 b & 0xc0 ≠ 0x80 && return i
148 ((i += 1) > n) | (l < 0xe0) && return i
149 # second continuation byte
150 @inbounds b = codeunit(s, i)
151 b & 0xc0 ≠ 0x80 && return i
152 ((i += 1) > n) | (l < 0xf0) && return i
153 # third continuation byte
154 @inbounds b = codeunit(s, i)
155 ifelse(b & 0xc0 ≠ 0x80, i, i+1)
156 end
157
158 ## checking UTF-8 & ACSII validity ##
159
160 byte_string_classify(data::Vector{UInt8}) =
161 ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data))
162 byte_string_classify(s::String) =
163 ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
164 # 0: neither valid ASCII nor UTF-8
165 # 1: valid ASCII
166 # 2: valid UTF-8
167
168 isvalid(::Type{String}, s::Union{Vector{UInt8},String}) = byte_string_classify(s) ≠ 0
169 isvalid(s::String) = isvalid(String, s)
170
171 is_valid_continuation(c) = c & 0xc0 == 0x80
172
173 ## required core functionality ##
174
175 @propagate_inbounds function iterate(s::String, i::Int=firstindex(s))
176 i > ncodeunits(s) && return nothing
177 b = codeunit(s, i)
178 u = UInt32(b) << 24
179 between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
180 return next_continued(s, i, u)
181 end
182
183 function next_continued(s::String, i::Int, u::UInt32)
184 u < 0xc0000000 && (i += 1; @goto ret)
185 n = ncodeunits(s)
186 # first continuation byte
187 (i += 1) > n && @goto ret
188 @inbounds b = codeunit(s, i)
189 b & 0xc0 == 0x80 || @goto ret
190 u |= UInt32(b) << 16
191 # second continuation byte
192 ((i += 1) > n) | (u < 0xe0000000) && @goto ret
193 @inbounds b = codeunit(s, i)
194 b & 0xc0 == 0x80 || @goto ret
195 u |= UInt32(b) << 8
196 # third continuation byte
197 ((i += 1) > n) | (u < 0xf0000000) && @goto ret
198 @inbounds b = codeunit(s, i)
199 b & 0xc0 == 0x80 || @goto ret
200 u |= UInt32(b); i += 1
201 @label ret
202 return reinterpret(Char, u), i
203 end
204
205 @propagate_inbounds function getindex(s::String, i::Int)
206 b = codeunit(s, i)
207 u = UInt32(b) << 24
208 between(b, 0x80, 0xf7) || return reinterpret(Char, u)
209 return getindex_continued(s, i, u)
210 end
211
212 function getindex_continued(s::String, i::Int, u::UInt32)
213 if u < 0xc0000000
214 # called from `getindex` which checks bounds
215 @inbounds isvalid(s, i) && @goto ret
216 string_index_err(s, i)
217 end
218 n = ncodeunits(s)
219
220 (i += 1) > n && @goto ret
221 @inbounds b = codeunit(s, i) # cont byte 1
222 b & 0xc0 == 0x80 || @goto ret
223 u |= UInt32(b) << 16
224
225 ((i += 1) > n) | (u < 0xe0000000) && @goto ret
226 @inbounds b = codeunit(s, i) # cont byte 2
227 b & 0xc0 == 0x80 || @goto ret
228 u |= UInt32(b) << 8
229
230 ((i += 1) > n) | (u < 0xf0000000) && @goto ret
231 @inbounds b = codeunit(s, i) # cont byte 3
232 b & 0xc0 == 0x80 || @goto ret
233 u |= UInt32(b)
234 @label ret
235 return reinterpret(Char, u)
236 end
237
238 getindex(s::String, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
239
240 function getindex(s::String, r::UnitRange{Int})
241 isempty(r) && return ""
242 i, j = first(r), last(r)
243 @boundscheck begin
244 checkbounds(s, r)
245 @inbounds isvalid(s, i) || string_index_err(s, i)
246 @inbounds isvalid(s, j) || string_index_err(s, j)
247 end
248 j = nextind(s, j) - 1
249 n = j - i + 1
250 ss = _string_n(n)
251 p = pointer(ss)
252 for k = 1:n
253 unsafe_store!(p, codeunit(s, i + k - 1), k)
254 end
255 return ss
256 end
257
258 function length(s::String, i::Int, j::Int)
259 @boundscheck begin
260 0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
261 0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
262 end
263 j < i && return 0
264 @inbounds i, k = thisind(s, i), i
265 c = j - i + (i == k)
266 length(s, i, j, c)
267 end
268
269 length(s::String) = length(s, 1, ncodeunits(s), ncodeunits(s))
270
271 @inline function length(s::String, i::Int, n::Int, c::Int)
272 i < n || return c
273 @inbounds b = codeunit(s, i)
274 @inbounds while true
275 while true
276 (i += 1) ≤ n || return c
277 0xc0 ≤ b ≤ 0xf7 && break
278 b = codeunit(s, i)
279 end
280 l = b
281 b = codeunit(s, i) # cont byte 1
282 c -= (x = b & 0xc0 == 0x80)
283 x & (l ≥ 0xe0) || continue
284
285 (i += 1) ≤ n || return c
286 b = codeunit(s, i) # cont byte 2
287 c -= (x = b & 0xc0 == 0x80)
288 x & (l ≥ 0xf0) || continue
289
290 (i += 1) ≤ n || return c
291 b = codeunit(s, i) # cont byte 3
292 c -= (b & 0xc0 == 0x80)
293 end
294 end
295
296 # TODO: delete or move to char.jl
297 first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8
298
299 ## overload methods for efficiency ##
300
301 isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
302
303 # UTF-8 encoding length of a character
304 # TODO: delete or move to char.jl
305 codelen(c::Char) = 4 - (trailing_zeros(0xff000000 | reinterpret(UInt32, c)) >> 3)
306
307 """
308 repeat(c::AbstractChar, r::Integer) -> String
309
310 Repeat a character `r` times. This can equivalently be accomplished by calling [`c^r`](@ref ^).
311
312 # Examples
313 ```jldoctest
314 julia> repeat('A', 3)
315 "AAA"
316 ```
317 """
318 repeat(c::AbstractChar, r::Integer) = repeat(Char(c), r) # fallback
319 function repeat(c::Char, r::Integer)
320 r == 0 && return ""
321 r < 0 && throw(ArgumentError("can't repeat a character $r times"))
322 u = bswap(reinterpret(UInt32, c))
323 n = 4 - (leading_zeros(u | 0xff) >> 3)
324 s = _string_n(n*r)
325 p = pointer(s)
326 if n == 1
327 ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), p, u % UInt8, r)
328 elseif n == 2
329 p16 = reinterpret(Ptr{UInt16}, p)
330 for i = 1:r
331 unsafe_store!(p16, u % UInt16, i)
332 end
333 elseif n == 3
334 b1 = (u >> 0) % UInt8
335 b2 = (u >> 8) % UInt8
336 b3 = (u >> 16) % UInt8
337 for i = 0:r-1
338 unsafe_store!(p, b1, 3i + 1)
339 unsafe_store!(p, b2, 3i + 2)
340 unsafe_store!(p, b3, 3i + 3)
341 end
342 elseif n == 4
343 p32 = reinterpret(Ptr{UInt32}, pointer(s))
344 for i = 1:r
345 unsafe_store!(p32, u, i)
346 end
347 end
348 return s
349 end