How to Convert String to Rune in Golang

Created
Modified

UTF-8 Decoder

The unicode/utf8 package provides one that we can use like this:

package main

import (
  "fmt"
  "unicode/utf8"
)

func main() {

  s := "Hello, 世界"

  for i := 0; i < len(s); {
    r, size := utf8.DecodeRuneInString(s[i:])
    fmt.Printf("%d\t%c\n", i, r)
    i += size
  }

}
$ go run main.go
0   H
1   e
2   l
3   l
4   o
5   ,
6  
7   世
10  界

Go’s Range Loop

A range loop decodes a UTF-8-encoded string.

package main

import (
  "fmt"
)

func main() {

  s := "Hello, 世界"

  for i, r := range s {
    fmt.Printf("%d\t%q\t%d\n", i, r, r)
  }

}
$ go run main.go
0   'H' 72
1   'e' 101
2   'l' 108
3   'l' 108
4   'o' 111
5   ',' 44
6   ' ' 32
7   '世' 19990
10  '界' 30028

We could use a simple range loop to count the number of runes in a string, like this:

package main

import (
  "fmt"
  "unicode/utf8"
)

func main() {

  s := "Hello, 世界"

  n := 0
  for range s {
    n++
  }

  fmt.Println("Count ::", n)

  // Or we can just call utf8.RuneCountInString(s).
  fmt.Println("Count ::", utf8.RuneCountInString(s))

}
$ go run main.go
Count :: 9
Count :: 9

A []rune Conversion

A []rune conversion applied to a UTF-8-encoded string returns the sequence of Unicode code points that the string encodes:

package main

import (
  "fmt"
)

func main() {

  // "program" in Japanese katakana
  s := "こんにちは"
  fmt.Printf("% x\n", s) // "e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af"
  r := []rune(s)
  fmt.Printf("%x\n", r) // "[3053 3093 306b 3061 306f]"

  fmt.Println(string(r))

  fmt.Println(string(0x306f)) // "は"
}
$ go run main.go
e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af
[3053 3093 306b 3061 306f]
こんにちは
は

Related Tags

#rune# #string#