Skip to content

Commit ef4e566

Browse files
author
Larefly
authored
set perl parameter to TRUE
2 parents 01a9200 + 4291338 commit ef4e566

File tree

5 files changed

+43
-50
lines changed

5 files changed

+43
-50
lines changed

paws.common/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ License: Apache License (>= 2.0)
1717
Encoding: UTF-8
1818
Imports:
1919
base64enc,
20+
curl,
2021
digest,
2122
httr,
2223
jsonlite,

paws.common/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ export(tag_get)
2424
export(tag_get_all)
2525
export(tag_has)
2626
export(type)
27+
importFrom(curl,curl_unescape)

paws.common/R/handlers_rest.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#' @include url.R
2+
13
# Build a request for the REST protocol.
24
rest_build <- function(request) {
35
if (params_filled(request)) {
@@ -227,10 +229,10 @@ clean_path <- function(url) {
227229

228230
# Return a string with special characters escaped, e.g. " " -> "%20".
229231
escape_path <- function(string, encode_sep) {
230-
path <- utils::URLencode(string, TRUE)
231-
if (!encode_sep) {
232-
path <- gsub("%2F", "/", path)
232+
if(!encode_sep){
233+
base_url_encode <- paste0("/", base_url_encode)
233234
}
235+
path <- paws_url_encoder(string, paste0("[^", base_url_encode, "]"))
234236
return(path)
235237
}
236238

paws.common/R/url.R

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#' @include struct.R
22
NULL
33

4+
#' @importFrom curl curl_unescape
5+
46
# A Url object stores a parse URL.
57
Url <- struct(
68
scheme = "",
@@ -109,7 +111,6 @@ query_escape <- function(string) {
109111
# Escape strings so they can be safely included in a URL.
110112
escape <- function(string, mode){
111113
# base characters that won't be encoded
112-
base_url_encode <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._~-"
113114
if (mode == "encodeHost" || mode == "encodeZone") {
114115
# host and zone characters that won't be encoded
115116
host_zone_pattern = "][!$&'()*+,;=:<>\""
@@ -140,71 +141,49 @@ escape <- function(string, mode){
140141
escape_string <- paws_url_encoder(string, paste0("[^", base_url_encode, "]"))
141142

142143
# replace whitespace encoding from %20 to +
143-
return(gsub("%20", "+", escape_string, fixed =T))
144+
return(gsub("%20", "+", escape_string, fixed = TRUE))
144145
}
145146
if (mode == "encodeFragment") {
146147
return(paws_url_encoder(string, paste0("[^", pattern, "]")))
147148
}
148-
return(utils::URLencode(string, reserved = TRUE))
149+
return(paws_url_encoder(string, paste0("[^", base_url_encode, "]")))
150+
}
151+
152+
base_url_encode <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._~-"
153+
154+
is.ascii <- function(string){
155+
return(string == iconv(string, "latin1", "ASCII", sub=""))
149156
}
150157

151158
# Escape characters given a pattern
152159
paws_url_encoder <- function(string, pattern){
153160
vapply(string, function(string){
154161
# split string out into individual characters
155-
chars <- strsplit(string, "")[[1L]]
162+
chars <- strsplit(string, "", perl = TRUE)[[1L]]
156163
# find characters that match pattern
157-
found <- grep(pattern, chars)
164+
found <- grep(pattern, chars, perl = TRUE)
158165
if (length(found)) {
159-
# encode found characters only
160-
chars[found] <- toupper(paste0("%", charToRaw(string)[found]))
166+
# check if string is ascii
167+
if(is.ascii(string)){
168+
# encode found characters only
169+
chars[found] <- toupper(paste0("%", charToRaw(string)[found]))
170+
} else {
171+
# group encoded part of non-ascii character:
172+
# e.g. "界" -> "%E7%95%8C"
173+
chars[found] <- vapply(chars[found], function(char) {
174+
toupper(paste0("%", charToRaw(char), collapse = ""))
175+
}, character(1))
176+
}
161177
}
162178
# rebuild string with encoded characters
163179
paste(chars, collapse = "")
164180
}, character(1), USE.NAMES = FALSE)
165181
}
166182

167-
# decode encoded url strings
168-
paws_url_decoder <- function(URL) {
169-
vapply(URL, function(url){
170-
# split string into separate characters
171-
chars <- strsplit(url, "")[[1]]
172-
173-
# locate % position
174-
found <- grep("%", chars, fixed = TRUE)
175-
176-
if (length(found)) {
177-
start <- found + 1
178-
end <- found + 2
179-
180-
# get raw vector of encoded parts (character form)
181-
# for example: "%20" -> "20"
182-
encoded <- vapply(seq_along(start), function(i) {
183-
paste0(chars[start[i]:end[i]], collapse = "")
184-
}, FUN.VALUE = character(1)
185-
)
186-
# remove encoded parts from chars
187-
rm <- c(start, end)
188-
189-
# update character % position
190-
found <- grep("%", chars[-rm], fixed = TRUE)
191-
192-
# convert split url to raw
193-
char_raw <- charToRaw(paste(chars[-rm], collapse=""))
194-
195-
# replace character % with decoded parts
196-
char_raw[found] <- as.raw(as.hexmode(encoded))
197-
198-
return(rawToChar(char_raw))
199-
}
200-
return(url)
201-
}, character(1), USE.NAMES = FALSE)
202-
}
203-
204183
# Un-escape a string.
205184
# TODO: Complete.
206185
unescape <- function(string) {
207-
return(paws_url_decoder(string))
186+
return(curl_unescape(string))
208187
}
209188

210189
# The inverse of query_escape: convert the encoded string back to the original,

paws.common/tests/testthat/test_escape.R

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,23 @@ test_that("check all escape modes", {
1818
}
1919
})
2020

21+
test_that("check if non-ascci characters are correctly encoded", {
22+
string <- "なでçãкатынü"
23+
pattern <- paste0("[^", base_url_encode, "]")
24+
actual = paws_url_encoder(string, pattern)
25+
expect_equal(
26+
actual,
27+
"%E3%81%AA%E3%81%A7%C3%A7%C3%A3%D0%BA%D0%B0%D1%82%D1%8B%D0%BD%C3%BC"
28+
)
29+
})
30+
2131
test_that("check if encoded url is correctly decoded", {
2232
string <- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~`!@#$%^&*()=+[{]}\\|;:'\",<>/? "
2333
pattern <- "[^ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._~-]"
2434

2535
url = paste(sample(strsplit(string, "")[[1]], 1e6, replace = T), collapse = "")
2636
url_encode = paws_url_encoder(url, pattern)
27-
actual = paws_url_decoder(url_encode)
37+
actual = unescape(url_encode)
2838

2939
expect_equal(actual, url)
3040
})
@@ -35,7 +45,7 @@ test_that("check if non-encoded url is correctly decoded", {
3545

3646
url = paste(sample(strsplit(string, "")[[1]], 1e6, replace = T), collapse = "")
3747
url_encode = paws_url_encoder(url, pattern)
38-
actual = paws_url_decoder(url_encode)
48+
actual = unescape(url_encode)
3949

4050
expect_equal(actual, url)
4151
})

0 commit comments

Comments
 (0)