Geen omschrijving

cidrreport.go 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. package internet
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "net/http"
  7. "os"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/garyburd/redigo/redis"
  13. "golang.org/x/net/html"
  14. )
  15. // ASDescription contains the parsed result of an row inside a autnums.html file.
  16. type ASDescription struct {
  17. ASN int // AS Number
  18. Description string // AS Description
  19. CountryCode string // Country code (split from the as description field)
  20. }
  21. // CIDRReport encapuslates downloading and importing of autnums.html files.
  22. type CIDRReport struct {
  23. Date time.Time // Timestamp
  24. }
  25. // Path returns the absolute path to the target archive dump download file.
  26. func (b *CIDRReport) Path() string {
  27. return filepath.Join(
  28. b.dir(), fmt.Sprintf("cidr-report-%s.txt", b.Date.Format("20060102")))
  29. }
  30. func (b *CIDRReport) dir() string {
  31. return filepath.Join(
  32. dataDir, "cache", b.Date.Format("200601"))
  33. }
  34. func (b *CIDRReport) day() string {
  35. return b.Date.Format("20060102")
  36. }
  37. // IsDownloaded returns true if the CIDRReport archive is downloaded locally.
  38. func (b *CIDRReport) IsDownloaded() bool {
  39. p := b.Path()
  40. if _, err := os.Stat(p); err == nil {
  41. return true
  42. }
  43. return false
  44. }
  45. // Import stores the contents of a downloaded BGP dump into a redis server.
  46. // -1 is returned if the dump is alredy imported into redis.
  47. func (b *CIDRReport) Import(conn redis.Conn) (int, error) {
  48. alreadyImported, err := redis.Bool(conn.Do("SISMEMBER", "asd:imported_dates", b.day()))
  49. if err != nil {
  50. return 0, err
  51. }
  52. if alreadyImported {
  53. return -1, nil
  54. }
  55. file, err := os.Open(b.Path())
  56. if err != nil {
  57. return 0, err
  58. }
  59. n := 0
  60. day := b.day()
  61. err = parseReport(file, func(asd *ASDescription) error {
  62. conn.Send("HSET", fmt.Sprintf("asd:%d", asd.ASN), day,
  63. fmt.Sprintf("%s, %s", asd.Description, asd.CountryCode))
  64. n++
  65. if n%10000 == 0 {
  66. err := conn.Flush()
  67. if err != nil {
  68. return err
  69. }
  70. }
  71. return nil
  72. })
  73. conn.Send("SADD", "asd:imported_dates", day)
  74. err = conn.Flush()
  75. if err != nil {
  76. return 0, err
  77. }
  78. return n, nil
  79. }
  80. // Download fetches http://www.cidr-report.org/as2.0/autnums.html and stores it
  81. // in the cache. Currently one per day is fetched. Download returns early with
  82. // no error if the file already is downloaded to disk.
  83. func (b *CIDRReport) Download() error {
  84. dumpDir := b.dir()
  85. err := os.MkdirAll(dumpDir, 0777)
  86. if err != nil {
  87. return err
  88. }
  89. if b.IsDownloaded() {
  90. return nil
  91. }
  92. err = os.MkdirAll(filepath.Join(dataDir, "spool"), 0777)
  93. if err != nil {
  94. return err
  95. }
  96. tempFile, err := ioutil.TempFile(
  97. filepath.Join(dataDir, "spool"), b.day())
  98. if err != nil {
  99. return err
  100. }
  101. defer tempFile.Close()
  102. dlURL := "http://www.cidr-report.org/as2.0/autnums.html"
  103. resp, err := http.Get(dlURL)
  104. if err != nil {
  105. return err
  106. }
  107. if resp.StatusCode != http.StatusOK {
  108. return fmt.Errorf("Got http status code %s response for %s", resp.Status, dlURL)
  109. }
  110. // log.Printf("Downloading %s\n", dlURL)
  111. defer resp.Body.Close()
  112. _, err = io.Copy(tempFile, resp.Body)
  113. if err != nil {
  114. return err
  115. }
  116. err = os.Rename(tempFile.Name(), b.Path())
  117. if err != nil {
  118. return err
  119. }
  120. return nil
  121. }
  122. // RefreshCIDRReport ensures that the latest dump available is the one which is installed.
  123. func RefreshCIDRReport(conn redis.Conn) (int, error) {
  124. for _, b := range []CIDRReport{
  125. {Date: time.Now()},
  126. {Date: time.Now().Add(-time.Duration(time.Hour * 24))},
  127. } {
  128. err := b.Download()
  129. if err != nil {
  130. return 0, err
  131. }
  132. if b.IsDownloaded() {
  133. return b.Import(conn)
  134. }
  135. }
  136. return 0, nil
  137. }
  138. func parseReport(r io.Reader, emitter func(*ASDescription) error) error {
  139. z := html.NewTokenizer(r)
  140. n := 0
  141. depth := 0
  142. var asn *int
  143. loop:
  144. for {
  145. tt := z.Next()
  146. switch tt {
  147. case html.ErrorToken:
  148. break loop
  149. case html.TextToken:
  150. if asn != nil {
  151. desc := strings.TrimSpace(string(z.Text()))
  152. ccpos := strings.LastIndex(desc, ",")
  153. if ccpos == -1 {
  154. return ParseError{
  155. Message: "Could not parse country code",
  156. Path: "cidrreport",
  157. LineNum: n,
  158. Line: fmt.Sprintf("asn:%d desc:%s", asn, desc),
  159. }
  160. }
  161. emitter(&ASDescription{
  162. ASN: *asn,
  163. Description: strings.TrimSpace(desc[:ccpos]),
  164. CountryCode: strings.TrimSpace(desc[ccpos+1:]),
  165. })
  166. n++
  167. asn = nil
  168. } else if depth > 0 {
  169. asnstr := strings.TrimPrefix(string(z.Text()), "AS")
  170. var err error
  171. i, err := strconv.Atoi(strings.TrimSpace(asnstr))
  172. if err != nil {
  173. return ParseError{
  174. Message: err.Error(),
  175. Path: "cidrreport",
  176. LineNum: n,
  177. }
  178. }
  179. asn = &i
  180. }
  181. case html.StartTagToken, html.EndTagToken:
  182. tn, _ := z.TagName()
  183. if len(tn) == 1 && tn[0] == 'a' {
  184. if tt == html.StartTagToken {
  185. depth++
  186. } else {
  187. depth--
  188. }
  189. }
  190. }
  191. }
  192. if n == 0 {
  193. return ParseError{
  194. Message: "no entries found",
  195. Path: "cidrreport",
  196. LineNum: n,
  197. }
  198. }
  199. return nil
  200. }