(ns codescene.features.code-coverage.validation-messaging
  (:require
   [clojure.math :as math]
   [clojure.string :as str])
  (:import (java.time OffsetDateTime Instant)
           (java.time.format DateTimeFormatter)
           (java.time.temporal ChronoUnit)))

(defn- time-arg-dispatch
  [x]
  (let [str? (string? x)]
    (cond
      (and str? (re-matches #"\d{4}-\d{2}-\d{2}" x)) :date-string
      str? :time-zone-string
      (instance? org.joda.time.DateTime x) :joda)))

(defmulti as-instant time-arg-dispatch)
(defmethod as-instant :date-string [time-str]
  (-> (str time-str "T00:00:00+00:00")
      (OffsetDateTime/parse  DateTimeFormatter/ISO_OFFSET_DATE_TIME)
      (.toInstant)))

(defmethod as-instant :time-zone-string [time-str]
  (.toInstant (OffsetDateTime/parse time-str DateTimeFormatter/ISO_OFFSET_DATE_TIME)))

(defmethod as-instant :joda [joda-time]
  (Instant/ofEpochMilli (.getMillis joda-time)))
;(ns-unmap 'codescene.features.code-coverage.validation-messaging 'as-instant)

(defn parse-git-time
  [time]
  (as-instant time))

(comment
  (parse-git-time "2025-06-11T00:00:00+02:00"))

(defn- validation-changed-files-scale
  [file-count]
  (if (zero? file-count)
    1
    (inc (math/log10 (* 0.5  file-count)))))

(defn- time-difference
  [analysis-time-str data-def-time-str]
  (let [analysis-time (parse-git-time analysis-time-str)
        data-def-time (parse-git-time data-def-time-str)]
    (abs (.until data-def-time analysis-time ChronoUnit/HOURS))))

(defn- validation-temporal-scale
  [diff-hours]
  (if (zero? diff-hours)
    1
    (inc (math/log10 (/ diff-hours 4)))))

(defn- score-data-def
  [analysis-time-str {:keys [files-changed created-at]}]
  (let [diff-hours (time-difference  analysis-time-str created-at)
        time-score (validation-temporal-scale diff-hours)
        files-score (validation-changed-files-scale (count files-changed))]
    {:time-score time-score
     :files-score files-score
     :time diff-hours
     :files-changed (count files-changed)
     :combined-score (* time-score files-score)}))

(comment

  (defn- row-at-files-changed
    [ref-time changed-count createds-at]
    (->
     (map (fn [{:keys [date offset-days]}]
            (let [r (-> (score-data-def ref-time {:files-changed (repeat changed-count "blah") :created-at date})
                        (select-keys [:time-score :files-score :combined-score]))]
              {(keyword (str "days-behind-" offset-days)) (str "TIME: " (:time-score r) " FILES: " (:files-score r) " COMBO: " (:combined-score r))}))
          createds-at)))

;"2025-06-11T00:00:00+02:00"
  (def sample-dates [{:date  "2025-06-29T00:00:00+00:00" :offset-days 1}
                     {:date "2025-06-28T00:00:00+00:00" :offset-days 2}
                     {:date "2025-06-27T00:00:00+00:00" :offset-days 3}
                     {:date "2025-06-25T00:00:00+00:00" :offset-days 5}
                     {:date "2025-06-20T00:00:00+00:00" :offset-days 10}])
  (map
   (fn [f-count]
     {(keyword (str "changed-" f-count)) (row-at-files-changed "2025-06-30T00:00:00+00:00" f-count sample-dates)})
   [0 10 50 100])
;; Chart showing the interplay between number of files and days behind.
;; 
;; => ({:changed-0
;;      ({:days-behind-1
;;        "TIME: 1.7781512503836436 FILES: 1 COMBO: 1.7781512503836436"}
;;       {:days-behind-2
;;        "TIME: 2.079181246047625 FILES: 1 COMBO: 2.079181246047625"}
;;       {:days-behind-3
;;        "TIME: 2.255272505103306 FILES: 1 COMBO: 2.255272505103306"}
;;       {:days-behind-5
;;        "TIME: 2.4771212547196626 FILES: 1 COMBO: 2.4771212547196626"}
;;       {:days-behind-10
;;        "TIME: 2.778151250383644 FILES: 1 COMBO: 2.778151250383644"})}
;;     {:changed-10
;;      ({:days-behind-1
;;        "TIME: 1.7781512503836436 FILES: 1.6989700043360187 COMBO: 3.0210256375743962"}
;;       {:days-behind-2
;;        "TIME: 2.079181246047625 FILES: 1.6989700043360187 COMBO: 3.5324665706129026"}
;;       {:days-behind-3
;;        "TIME: 2.255272505103306 FILES: 1.6989700043360187 COMBO: 3.831640337774268"}
;;       {:days-behind-5
;;        "TIME: 2.4771212547196626 FILES: 1.6989700043360187 COMBO: 4.208554708871909"}
;;       {:days-behind-10
;;        "TIME: 2.778151250383644 FILES: 1.6989700043360187 COMBO: 4.719995641910415"})}
;;     {:changed-50
;;      ({:days-behind-1
;;        "TIME: 1.7781512503836436 FILES: 2.3979400086720375 COMBO: 4.263900024765149"}
;;       {:days-behind-2
;;        "TIME: 2.079181246047625 FILES: 2.3979400086720375 COMBO: 4.98575189517818"}
;;       {:days-behind-3
;;        "TIME: 2.255272505103306 FILES: 2.3979400086720375 COMBO: 5.408008170445229"}
;;       {:days-behind-5
;;        "TIME: 2.4771212547196626 FILES: 2.3979400086720375 COMBO: 5.939988163024156"}
;;       {:days-behind-10
;;        "TIME: 2.778151250383644 FILES: 2.3979400086720375 COMBO: 6.661840033437187"})}
;;     {:changed-100
;;      ({:days-behind-1
;;        "TIME: 1.7781512503836436 FILES: 2.6989700043360187 COMBO: 4.79917688795804"}
;;       {:days-behind-2
;;        "TIME: 2.079181246047625 FILES: 2.6989700043360187 COMBO: 5.611647816660527"}
;;       {:days-behind-3
;;        "TIME: 2.255272505103306 FILES: 2.6989700043360187 COMBO: 6.086912842877574"}
;;       {:days-behind-5
;;        "TIME: 2.4771212547196626 FILES: 2.6989700043360187 COMBO: 6.685675963591572"}
;;       {:days-behind-10
;;        "TIME: 2.778151250383644 FILES: 2.6989700043360187 COMBO: 7.498146892294059"})})
)


(defn- avg-by-key
  [data-divergence-scores k]
  (if (not-empty data-divergence-scores)
    (/ (reduce + (map k data-divergence-scores)) (count data-divergence-scores))
    0))

(defn- max-by-key
  [data-divergence-scores k]
  (if (not-empty data-divergence-scores)
    (apply max (map k data-divergence-scores))
    0))

(defn- by-severity
  [scores low-medium medium-high]
  (reduce
   (fn [acc score]
     (let [key-to-inc (cond
                        (< score low-medium) :low
                        (< score medium-high) :medium
                        :else :high)]
       (update acc key-to-inc inc)))
   {:low 0 :medium 0 :high 0}
   scores))

(def significant-threshold 3)
(def severe-threshold 6)

(defn- scores-summary
  [data-divergence-scores]
  (let [max-score-by-key (partial max-by-key data-divergence-scores)
        avg-score-by-key (partial avg-by-key data-divergence-scores)]
    {:average-combined    (avg-score-by-key :combined-score)
     :average-time-score  (avg-score-by-key :time-score)
     :average-files-score (avg-score-by-key :files-score)
     :max-combined        (max-score-by-key :combined-score)
     :max-time            (max-score-by-key :time)
     :max-time-score      (max-score-by-key :time-score)
     :max-files-changed   (max-score-by-key :files-changed)
     :max-files-score     (max-score-by-key :files-score)
     :data-def-count      (count data-divergence-scores)
     :combined-severity   (by-severity (map :combined-score data-divergence-scores) significant-threshold severe-threshold)}))

(defn weighted-result-validation-per-repo
  [repo data-definitions {:keys [commit commit-time]}]
  (let [data-divergence-scores (map (partial score-data-def commit-time) data-definitions)
        {:keys [average-combined] :as summary} (-> data-divergence-scores scores-summary (assoc :repo repo))]
    (cond
      (empty? data-definitions)
      (assoc summary :status :no-defs)

      (every? #(= commit (:commit-sha %)) data-definitions)
      (assoc summary :status :commit-match)

      (every? #(< (:combined-score %) significant-threshold) data-divergence-scores)
      (assoc summary :status :acceptable)

      (> severe-threshold average-combined)
      (assoc summary :status :medium)

      (< severe-threshold average-combined)
      (assoc summary :status :severe)

      ;; Should only get here with missing data, e.g. nil
      :else
      (throw (ex-info "Missing weighted result validation clause." {:repo repo})))))

(defn- singular-plural
  [singular plural num]
  (if (= 1 num) singular plural))

(defn presentation-by-severity
  [data-def-count medium high]
  (->> [(format "Out of %d uploads, " data-def-count)
        (when (pos? medium)
          (format "%d %s significantly affected" medium (singular-plural "is" "are" medium)))
        (when (and (pos? medium) (pos? high))
          " and ")
        (when (pos? high)
          (format "%d %s severely affected" high (singular-plural "is" "are" high)))
        "."]
       (filter identity)
       (str/join "")))

(defn number-of-uploads-affected
  [{:keys [data-def-count combined-severity]}]
  (cond
    ;; These don't make sense if there's just one upload
    (= 1 data-def-count)
    ""

    (= data-def-count (:high combined-severity))
    (format "All %d uploads are severely affected." data-def-count)

    :else
    (presentation-by-severity data-def-count (:medium combined-severity) (:high combined-severity))))

(defn- oldest-upload-text
  [{:keys [max-time max-combined]}]
  (when (and (> max-combined 3) (> max-time 24))
    (format "At least one upload is %s days behind the analysis." (int (math/floor (/ max-time 24))))))

(def ^:private status->display
  {:medium "A significant part of the code coverage data for the repository %s is out of sync with the analysis."
   :severe "The code coverage data for the repository  %s is severely out of sync with the analysis."})

(defn- outliers-text
  [{:keys [max-time-score average-time-score max-files-score average-files-score] :as summary}]
  (let [time-outliers? (> max-time-score (* 2 average-time-score))
        files-outliers? (> max-files-score (* 2 average-files-score))]
    (->>  [(when (or time-outliers? files-outliers?) "Outliers: ")
           (when time-outliers? "One or more coverage data uploads are lagging the others.")
           (when files-outliers? "One or more coverage data uploads have especially large numbers of files that have changed since coverage data was uploaded.")
           (oldest-upload-text summary)]
          (filter identity)
          (str/join " "))))

(defn- present-problematic-statuses
  [{:keys [repo status] :as summary}]
  (let [opening (format (get status->display status) repo)]
    {:level :warn
     :description (str opening)
     :message (str opening  " " (outliers-text summary) " " (number-of-uploads-affected summary))
     :remedy "Ensure that your code coverage upload pipeline is working correctly."
     :data summary}))

(defn present-status
  [{:keys [repo status] :as summary}]
  (case status
    :no-defs
    {:level :info
     :description (format "No code coverage data is available for the %s repository." repo)
     :message (format  "No code coverage data is available for the %s repository. Generate and upload code coverage data to CodeScene." repo)
     :remedy "Begin uploading code coverage data."
     :data summary}

    :commit-match
    {:level :info
     :description (format "All coverage data for repository %s matches the main analysis exactly." repo)
     :data summary}

    :acceptable
    {:level :info
     :description (format  "Coverage data for repository %s mostly up to date." repo)
     :data summary}

    (present-problematic-statuses summary)))

(defn new-validate-result
  [repo data-definitions commit-with-time]
  (-> (weighted-result-validation-per-repo repo data-definitions commit-with-time)
      present-status))
