Commit ab36419bbd9ebed0a75fb227a633d8c5bc14d3b8

Authored by James McMinn
1 parent d508d7c941
Exists in master

Large changes to Transform API. Removed redundant method from Inverted Index

Showing 4 changed files with 26 additions and 24 deletions Side-by-side Diff

index/invertedindex.go
... ... @@ -18,14 +18,14 @@ type Inverted struct {
18 18 NumTokens uint64
19 19 }
20 20  
21   -func (i *Inverted) Init() *Inverted {
  21 +func (i *Inverted) Reset() *Inverted {
22 22 i.index = make(map[string]*list.List)
23 23 i.count = make(map[string]uint64)
24 24 return i
25 25 }
26 26  
27 27 func NewInvertedIndex() *Inverted {
28   - return new(Inverted).Init()
  28 + return new(Inverted).Reset()
29 29 }
30 30  
31 31 func (i *Inverted) GetDocs(term string) []Doc {
... ... @@ -56,14 +56,6 @@ func (i *Inverted) IDF(n string) float64 {
56 56 return math.Log2(float64(i.NumDocuments+1) / float64(l+1))
57 57 }
58 58  
59   -func (i *Inverted) StringsToGrams(t map[string]uint) *map[string]uint {
60   - nt := make(map[string]uint)
61   - for k, v := range t {
62   - nt[k] = v
63   - }
64   - return &nt
65   -}
66   -
67 59 func (i *Inverted) AddDocument(d Doc) {
68 60 doc := d.TF()
69 61 for k, v := range doc {
transform/alphanumerictransform.go
... ... @@ -5,26 +5,31 @@ import (
5 5 "regexp"
6 6 )
7 7  
8   -var (
9   - reg *regexp.Regexp
10   -)
11   -
12 8 type AlphanumericTransform struct {
  9 + reg *regexp.Regexp
13 10 }
14 11  
15   -func NewAlphanumericTransform() *AlphanumericTransform {
  12 +// Create a new Alphanumeric Transformer, which removes any non-alhpanumeric
  13 +// chracters.
  14 +func NewAlphanumericTransform(extra string) *AlphanumericTransform {
16 15 var err error
17   - reg, err = regexp.Compile("[^A-Za-z0-9]+")
  16 + var reg *regexp.Regexp
  17 +
  18 + reg, err = regexp.Compile("[^A-Za-z0-9" + extra + "]+")
18 19 if err != nil {
19 20 log.Fatal(err)
20 21 }
21 22  
22   - return &AlphanumericTransform{}
  23 + return &AlphanumericTransform{reg: reg}
23 24 }
24 25  
25   -func (filter *AlphanumericTransform) Apply(input []string) (output []string) {
  26 +func (filter *AlphanumericTransform) ApplyAll(input []string) (output []string) {
26 27 for i := range input {
27   - output = append(output, reg.ReplaceAllString(input[i], ""))
  28 + output = append(output, filter.Apply(input[i]))
28 29 }
29 30 return output
30 31 }
  32 +
  33 +func (filter *AlphanumericTransform) Apply(input string) (output string) {
  34 + return filter.reg.ReplaceAllString(input, "")
  35 +}
transform/lowercasetransform.go
... ... @@ -13,9 +13,13 @@ func NewLowercaseTransform() *LowercaseTransform {
13 13 return &LowercaseTransform{}
14 14 }
15 15  
16   -func (filter *LowercaseTransform) Apply(input []string) (output []string) {
17   - for i := range input {
18   - output = append(output, strings.ToLower(input[i]))
  16 +func (filter *LowercaseTransform) ApplyAll(input []string) (output []string) {
  17 + for _, v := range input {
  18 + output = append(output, filter.Apply(v))
19 19 }
20 20 return output
21 21 }
  22 +
  23 +func (filter *LowercaseTransform) Apply(input string) (output string) {
  24 + return strings.ToLower(input)
  25 +}
transform/transform.go
1 1 package transform
2 2  
3   -type transform interface {
4   - Apply(input []string) (output []string)
  3 +type Transform interface {
  4 + ApplyAll(input []string) (output []string)
  5 + Apply(input string) (output string)
5 6 }