feat: ignore seen links

This commit is contained in:
2025-11-27 13:03:58 +03:00
parent cc73978637
commit cab126c8de

View File

@@ -22,6 +22,7 @@ package linkvalidator
import (
"net/url"
"strings"
"sync"
"git.weirdcat.su/weirdcat/auto-attendance/internal/config"
"git.weirdcat.su/weirdcat/auto-attendance/internal/logger"
@@ -29,11 +30,15 @@ import (
type LinkValidator interface {
ValidateLink(string) (token string, ok bool)
ResetSeenLinks()
GetSeenCount() int
}
type linkValidatorImpl struct {
config *config.Config
log *logger.Logger
seenMu sync.RWMutex
seenLinks map[string]bool
}
// ValidateLink implements LinkValidator.
@@ -76,13 +81,46 @@ func (v *linkValidatorImpl) ValidateLink(rawURL string) (token string, ok bool)
return "", false
}
v.log.Debug("URL validation successful", "url", rawURL)
// Check if we've already seen this token
v.seenMu.RLock()
alreadySeen := v.seenLinks[token]
v.seenMu.RUnlock()
if alreadySeen {
v.log.Debug("URL token already processed, skipping", "token", token, "url", rawURL)
return token, false
}
// Mark this token as seen
v.seenMu.Lock()
v.seenLinks[token] = true
v.seenMu.Unlock()
v.log.Debug("URL validation successful", "url", rawURL, "token", token)
return token, true
}
// ResetSeenLinks clears the cache of seen links
func (v *linkValidatorImpl) ResetSeenLinks() {
v.seenMu.Lock()
defer v.seenMu.Unlock()
oldCount := len(v.seenLinks)
v.seenLinks = make(map[string]bool)
v.log.Debug("Reset seen links cache", "previous_count", oldCount)
}
// GetSeenCount returns the number of unique links seen
func (v *linkValidatorImpl) GetSeenCount() int {
v.seenMu.RLock()
defer v.seenMu.RUnlock()
return len(v.seenLinks)
}
func NewLinkValidator(config *config.Config, log *logger.Logger) LinkValidator {
return &linkValidatorImpl{
config: config,
log: log,
seenLinks: make(map[string]bool),
}
}