|
1 | 1 | #' @include struct.R
|
2 | 2 | NULL
|
3 | 3 |
|
| 4 | +#' @importFrom curl curl_unescape |
| 5 | + |
4 | 6 | # A Url object stores a parse URL.
|
5 | 7 | Url <- struct(
|
6 | 8 | scheme = "",
|
@@ -109,7 +111,6 @@ query_escape <- function(string) {
|
109 | 111 | # Escape strings so they can be safely included in a URL.
|
110 | 112 | escape <- function(string, mode){
|
111 | 113 | # base characters that won't be encoded
|
112 |
| - base_url_encode <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._~-" |
113 | 114 | if (mode == "encodeHost" || mode == "encodeZone") {
|
114 | 115 | # host and zone characters that won't be encoded
|
115 | 116 | host_zone_pattern = "][!$&'()*+,;=:<>\""
|
@@ -140,71 +141,49 @@ escape <- function(string, mode){
|
140 | 141 | escape_string <- paws_url_encoder(string, paste0("[^", base_url_encode, "]"))
|
141 | 142 |
|
142 | 143 | # replace whitespace encoding from %20 to +
|
143 |
| - return(gsub("%20", "+", escape_string, fixed =T)) |
| 144 | + return(gsub("%20", "+", escape_string, fixed = TRUE)) |
144 | 145 | }
|
145 | 146 | if (mode == "encodeFragment") {
|
146 | 147 | return(paws_url_encoder(string, paste0("[^", pattern, "]")))
|
147 | 148 | }
|
148 |
| - return(utils::URLencode(string, reserved = TRUE)) |
| 149 | + return(paws_url_encoder(string, paste0("[^", base_url_encode, "]"))) |
| 150 | +} |
| 151 | + |
| 152 | +base_url_encode <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._~-" |
| 153 | + |
| 154 | +is.ascii <- function(string){ |
| 155 | + return(string == iconv(string, "latin1", "ASCII", sub="")) |
149 | 156 | }
|
150 | 157 |
|
151 | 158 | # Escape characters given a pattern
|
152 | 159 | paws_url_encoder <- function(string, pattern){
|
153 | 160 | vapply(string, function(string){
|
154 | 161 | # split string out into individual characters
|
155 |
| - chars <- strsplit(string, "")[[1L]] |
| 162 | + chars <- strsplit(string, "", perl = TRUE)[[1L]] |
156 | 163 | # find characters that match pattern
|
157 |
| - found <- grep(pattern, chars) |
| 164 | + found <- grep(pattern, chars, perl = TRUE) |
158 | 165 | if (length(found)) {
|
159 |
| - # encode found characters only |
160 |
| - chars[found] <- toupper(paste0("%", charToRaw(string)[found])) |
| 166 | + # check if string is ascii |
| 167 | + if(is.ascii(string)){ |
| 168 | + # encode found characters only |
| 169 | + chars[found] <- toupper(paste0("%", charToRaw(string)[found])) |
| 170 | + } else { |
| 171 | + # group encoded part of non-ascii character: |
| 172 | + # e.g. "界" -> "%E7%95%8C" |
| 173 | + chars[found] <- vapply(chars[found], function(char) { |
| 174 | + toupper(paste0("%", charToRaw(char), collapse = "")) |
| 175 | + }, character(1)) |
| 176 | + } |
161 | 177 | }
|
162 | 178 | # rebuild string with encoded characters
|
163 | 179 | paste(chars, collapse = "")
|
164 | 180 | }, character(1), USE.NAMES = FALSE)
|
165 | 181 | }
|
166 | 182 |
|
167 |
| -# decode encoded url strings |
168 |
| -paws_url_decoder <- function(URL) { |
169 |
| - vapply(URL, function(url){ |
170 |
| - # split string into separate characters |
171 |
| - chars <- strsplit(url, "")[[1]] |
172 |
| - |
173 |
| - # locate % position |
174 |
| - found <- grep("%", chars, fixed = TRUE) |
175 |
| - |
176 |
| - if (length(found)) { |
177 |
| - start <- found + 1 |
178 |
| - end <- found + 2 |
179 |
| - |
180 |
| - # get raw vector of encoded parts (character form) |
181 |
| - # for example: "%20" -> "20" |
182 |
| - encoded <- vapply(seq_along(start), function(i) { |
183 |
| - paste0(chars[start[i]:end[i]], collapse = "") |
184 |
| - }, FUN.VALUE = character(1) |
185 |
| - ) |
186 |
| - # remove encoded parts from chars |
187 |
| - rm <- c(start, end) |
188 |
| - |
189 |
| - # update character % position |
190 |
| - found <- grep("%", chars[-rm], fixed = TRUE) |
191 |
| - |
192 |
| - # convert split url to raw |
193 |
| - char_raw <- charToRaw(paste(chars[-rm], collapse="")) |
194 |
| - |
195 |
| - # replace character % with decoded parts |
196 |
| - char_raw[found] <- as.raw(as.hexmode(encoded)) |
197 |
| - |
198 |
| - return(rawToChar(char_raw)) |
199 |
| - } |
200 |
| - return(url) |
201 |
| - }, character(1), USE.NAMES = FALSE) |
202 |
| -} |
203 |
| - |
204 | 183 | # Un-escape a string.
|
205 | 184 | # TODO: Complete.
|
206 | 185 | unescape <- function(string) {
|
207 |
| - return(paws_url_decoder(string)) |
| 186 | + return(curl_unescape(string)) |
208 | 187 | }
|
209 | 188 |
|
210 | 189 | # The inverse of query_escape: convert the encoded string back to the original,
|
|
0 commit comments