前言:
此时大家对“c语言字符串切片函数strsub”大致比较注重,小伙伴们都想要知道一些“c语言字符串切片函数strsub”的相关资讯。那么小编同时在网络上搜集了一些有关“c语言字符串切片函数strsub””的相关文章,希望各位老铁们能喜欢,朋友们快快来学习一下吧!前言
昨天我们介绍 R 数据处理的时候,对字符串的操作都是用自带的函数。
虽然 R 的字符串并不是它的强项,看起来也不是那么的优雅,但是字符串在数据处理和清洗过程中还是扮演者较为重要的角色。
所以,今天我就讲下 R 字符串处理的第三方包 —— stringr
stringr 包提供了一组内聚函数,尽可能使字符串的操作简单化。
stringr 中主要包括四个函数族:
1.字符操作:这些函数允许操作字符串向量中每个字符串的字符2.提供添加、删除和操作空白字符的工具3.提供对语境敏感的操作4.模式匹配函数。总共包含 4 中模式,其中最常用的就是正则表达式安装
# install install.packages("stringr")# 从 GitHub 中安装最新的开发版本:# 需要用到 devtools 包,如果没有这个包,需要安装一下if (!require("devtools")) install.packages("devtools")devtools::install_github("tidyverse/stringr")使用导入
library(stringr)
stringr 中的所有函数都以 str_ 开头,并接受一个字符串向量作为第一个参数
1.单字符串操作str_length: 字符串长度
str_length(string)
参数:
string: 字符串或字符串向量
示例:
> s <- c("why", "video", "cross", "extra", "deal", "authority")> str_length(s)# [1] 3 5 5 5 4 9> str_length("love")# [1] 4> str_length(c("love", NA))# [1] 4 NAstr_sub: 提取和修改字符串
str_sub(string, start = 1L, end = -1L)
参数:
string: 字符串或字符串向量start: 开始位置end: 结束位置
示例:
x <- c("abcdef", "ghifjk")# The 3rd letter> str_sub(x, 3, 3)# [1] "c" "i"# The 2nd to 2nd-to-last character> str_sub(x, 2, -2)# [1] "bcde" "hifj"
修改字符串
> str_sub(x, 3, 3) <- "X"> x# [1] "abXdef" "ghXfjk"str_dup: 复制字符串
str_dup(string, times)
参数:
string: 字符串或字符串向量times: 复制数量
示例:
> str_dup(x, c(2, 3))# [1] "abcdefabcdef" "ghifjkghifjkghifjk"> str_dup(x, c(2))# [1] "abcdefabcdef" "ghifjkghifjk"> str_dup(x, 2)# [1] "abcdefabcdef" "ghifjkghifjk"str_c: 字符串拼接
str_c(..., sep = "", collapse = NULL)
参数:
...: 多参数的输入sep: 把多个字符串参数拼接为一个字符串的连接符号。collapse: 把多个向量参数拼接为一个字符串的连接符号。
> x <- c("why", "video", "cross", "extra", "deal", "authority")> str_c(x, collapse = ", ")# [1] "why, video, cross, extra, deal, authority"> str_c("why", "not", "my", sep = "-")[1] "why-not-my"2.空白字符1.str_pad: 补充字符串的长度
str_pad(string, width, side = c("left", "right", "both"), pad = " ")
参数:
string:width: 字符串填充后的长度side: 填充方向left: 左边填充right: 右边填充both: 两边都填充pad: 用于填充的字符
> x <- c("abc", "defghi")> str_pad(x, 10) # 默认在左边# [1] " abc" " defghi"> str_pad(x, 10, "both")# [1] " abc " " defghi "> str_pad(x, 10, pad = "=")# [1] "====abcdef" "====ghifjk"
如果你设置的字符串填充长度小于其本身,那么不会产生任何作用
str_pad(x, 4)#> [1] " abc" "defghi"2.str_trunc: 省略过长字符
str_trunc(string, width, side = c("right", "left", "center"), ellipsis = "...")
参数:
string: 字符串或字符串向量width: 最大字符串长度side:省略方式center: 中间省略left: 左边省略right: 右边省略
示例
> x <- "This string is moderately long"> rbind(+ str_trunc(x, 20, "right"),+ str_trunc(x, 20, "left"),+ str_trunc(x, 20, "center")+ )# [,1] # [1,] "This string is mo..."# [2,] "...s moderately long"# [3,] "This stri...ely long"3. str_trim: 去掉字符串的空格和TAB
str_trim(string, side = c("both", "left", "right"))
参数:
string: 字符串或字符串向量side:过滤方式both: 两边都过滤left: 左边过滤right: 右边过滤
示例
x <- c("Short", "This is a long string")
x <- c(" a ", "b ", " c")str_trim(x)#> [1] "a" "b" "c"str_trim(x, "left")#> [1] "a " "b " "c"4. str_wrap: 控制字符串输出格式
str_wrap(string, width = 80, indent = 0, exdent = 0)
参数:
string: 字符串或字符串向量。width: 设置一行所占的宽度。indent: 段落首行的缩进值exdent: 段落非首行的缩进值
示例
jabberwocky <- str_c( "`Twas brillig, and the slithy toves ", "did gyre and gimble in the wabe: ", "All mimsy were the borogoves, ", "and the mome raths outgrabe. ")cat(str_wrap(jabberwocky, width = 40))# `Twas brillig, and the slithy toves did# gyre and gimble in the wabe: All mimsy# were the borogoves, and the mome raths# outgrabe.3.语境1.大小写转换
str_to_upper(string, locale = "")str_to_lower(string, locale = "")str_to_title(string, locale = "")
参数:
string: 字符串。locale: 按哪种语言习惯
示例
x <- "I like horses."str_to_upper(x)#> [1] "I LIKE HORSES."str_to_title(x)#> [1] "I Like Horses."str_to_lower(x)#> [1] "i like horses."# Turkish has two sorts of i: with and without the dotstr_to_lower(x, "tr")#> [1] "ı like horses."2. 排序
# 对值排序str_sort(x, decreasing = FALSE, na_last = TRUE, locale = "", ...)str_order(x, decreasing = FALSE, na_last = TRUE, locale = "", ...)
参数:
x: 字符串或字符串向量。decreasing: 排序方向。na_last: NA 放置的位置:TRUE 放到最后,FALSE 放到最前,NA 过滤处理locale: 按哪种语言习惯排序
> x <- c("y", "i", "k")> str_order(x)# [1] 2 3 1> str_sort(x)# [1] "i" "k" "y"# In Lithuanian, y comes between i and k> str_sort(x, locale = "lt")# [1] "i" "y" "k"3. 编码方式
str_conv(string, encoding)
参数:
string: 字符串或字符串向量。encoding: 编码名。
示例:
# 把中文字符字节化> x <- charToRaw('你好')> x[1] c4 e3 ba c3# 默认 win 系统字符集为 GBK,GB2312 为 GBK 字集,转码正常> str_conv(x, "GBK")[1] "你好"> str_conv(x, "GB2312")[1] "你好"# 在 mac 系统下> x <- charToRaw('你好')> str_conv(x, "GBK")[1] "浣犲ソ"> str_conv(x, "GB2312")# [1] "浣\032濂\032"# Warning messages:# 1: In stri_conv(string, encoding, "UTF-8") :# mac > str_conv(x, "UTF-8")# [1] "你好"4.模式匹配
每个模式匹配函数的前两个参数都相同
string:字符串或字符串向量pattern:匹配模式
字符串和匹配模式
strings <- c( "apple", "219 733 8965", "329-293-8753", "Work: 579-499-7527; Home: 543.355.3679")phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"1.匹配字符串的字符
str_detect(string, pattern)
示例:
# 匹配字符串的字符> str_detect(strings, phone)# [1] FALSE TRUE TRUE TRUE2. 模式在字符串中的位置
str_locate(string, pattern)str_locate_all(string, pattern)
示例:
> (loc <- str_locate(strings, phone))#> start end#> [1,] NA NA#> [2,] 1 12#> [3,] 1 12#> [4,] 7 18> str_locate_all(strings, phone)#> [[1]]#> start end#> #> [[2]]#> start end#> [1,] 1 12#> #> [[3]]#> start end#> [1,] 1 12#> #> [[4]]#> start end#> [1,] 7 18#> [2,] 27 383. 从字符串中提取匹配模式
str_extract(string, pattern)str_extract_all(string, pattern, simplify = FALSE)
示例:
> val <- c("a1", 467, "ab2")# 返回匹配的数字> str_extract(val, "\\d")# [1] "1" "4" "2"> str_extract_all(val, "\\d")# [[1]]# [1] "1"# [[2]]# [1] "4" "6" "7"# [[3]]# [1] "2"# 返回匹配的字符> str_extract(val, "[a-z]+")# [1] "a" NA "ab"> str_extract_all(val, "\\w+")# [[1]]# [1] "a1"# [[2]]# [1] "467"# [[3]]# [1] "ab2"4. 从字符串中提取匹配组
str_match(string, pattern)str_match_all(string, pattern)
示例
> str_match(strings, phone)#> [,1] [,2] [,3] [,4] #> [1,] NA NA NA NA #> [2,] "219 733 8965" "219" "733" "8965"#> [3,] "329-293-8753" "329" "293" "8753"#> [4,] "579-499-7527" "579" "499" "7527"> str_match_all(strings, phone)#> [[1]]#> [,1] [,2] [,3] [,4]#> #> [[2]]#> [,1] [,2] [,3] [,4] #> [1,] "219 733 8965" "219" "733" "8965"#> #> [[3]]#> [,1] [,2] [,3] [,4] #> [1,] "329-293-8753" "329" "293" "8753"#> #> [[4]]#> [,1] [,2] [,3] [,4] #> [1,] "579-499-7527" "579" "499" "7527"#> [2,] "543.355.3679" "543" "355" "3679"5. 替换匹配的字符串
str_replace(string, pattern, replacement)str_replace_all(string, pattern, replacement)str_replace_na(string, replacement = "NA")
replacement 为替字符串
示例:
# 替换第一个匹配> str_replace(strings, phone, "XXX-XXX-XXXX")#> [1] "apple" #> [2] "XXX-XXX-XXXX" #> [3] "XXX-XXX-XXXX" #> [4] "Work: XXX-XXX-XXXX; Home: 543.355.3679"# 替换所有匹配> str_replace_all(strings, phone, "XXX-XXX-XXXX")#> [1] "apple" #> [2] "XXX-XXX-XXXX" #> [3] "XXX-XXX-XXXX" #> [4] "Work: XXX-XXX-XXXX; Home: XXX-XXX-XXXX"> str_replace_na(c(NA, "abc", "def"))# [1] "NA" "abc" "def"6. 字符串分割
str_split(string, pattern, n = Inf) # 返回 liststr_split_fixed(string, pattern, n) # 返回 matrix
n 为分割次数
示例:
str_split("a-b-c", "-")#> [[1]]#> [1] "a" "b" "c"str_split_fixed("a-b-c", "-", n = 2)#> [,1] [,2] #> [1,] "a" "b-c"7. 字符串计数
str_count(string, pattern = "")
示例:
> str_count(strings, phone)# [1] 0 1 1 28. 返回的匹配字符串
str_subset(string, pattern)
示例:
> str_subset(strings, phone)# [1] "219 733 8965" # [2] "329-293-8753" # [3] "Work: 579-499-7527; Home: 543.355.3679"9. 提取单词
word(string, start = 1L, end = start, sep = fixed(" "))string: 字符串,字符串向量。start: 开始位置。end: 结束位置。sep: 匹配字符。
示例
> val <- c("hello world", "this is my, girl")# 默认以空格分割,取第一个位置的字符串> word(val, 1)# [1] "hello" "this"> word(val, -1)# [1] "world" "girl" > word(val, 2, -1)# [1] "world" "is my, girl" # 以,分割,取第一个位置的字符串 > val <- '111,222,333,444'> word(val, 1, sep = fixed(','))[1] "111"> word(val, 3, sep = fixed(','))[1] "333"四种模式boundary: 匹配字符、行、句子或单词之间的边界
x <- "This is a sentence."str_split(x, boundary("word"))#> [[1]]#> [1] "This" "is" "a" "sentence"str_count(x, boundary("word"))#> [1] 4str_extract_all(x, boundary("word"))#> [[1]]#> [1] "This" "is" "a" "sentence"coll: 定义字符串标准排序规则。
i <- c("I", "İ", "i", "ı")i#> [1] "I" "İ" "i" "ı"str_subset(i, coll("i", ignore_case = TRUE))#> [1] "I" "i"str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))#> [1] "İ" "i"fixed: 定义用于匹配的字符,包括正则表达式中的转义符
> str_count(c("a.", ".", ".a.",NA), ".")[1] 2 1 3 NA# 用fixed匹配字符> str_count(c("a.", ".", ".a.",NA), fixed("."))[1] 1 1 2 NAregex: 定义正则表达式,默认就是正则表达式
> val[1] "a1" "467" "ab2"> str_extract(val, regex("\\w+"))[1] "a1" "467" "ab2"
标签: #c语言字符串切片函数strsub