(ns codescene.features.analysis.util
  (:require [codescene.analysis.versioning :as versioning]
            [codescene.analysis.paths :as paths]
            [codescene.util.json :as json]
            [codescene.features.util.number :as number-util]
            [codescene.presentation.display :as display]
            [codescene.presentation.system-health :as system-health]
            [codescene.analysis.visualization.parse-analysis-results :as parse]
            [codescene.features.analysis.item-filter :as item-filter]
            [clojure.set :as set]
            [clojure.string :as string]
            [clojure.java.io :as io]
            [clj-time.format :as f]
            [medley.core :as m]
            [slingshot.slingshot :refer [throw+]]
            [codescene.features.util.api :as api-utils]
            [spec-tools.parse]
            [codescene.features.api.spec.analysis :as api-analysis-spec]
            [codescene.features.util.csv :as csv]
            [taoensso.timbre :as log]
            [clojure.string :as str]))

(def analysis-time-formatter (f/formatters :date-time-no-ms))

(def ^:private biomarker-score->category
  {"1" "A"
   "2" "B"
   "3" "C"
   "4" "D"
   "5" "E"})

(def ^:private biomarker-category->criticallity
  {"-" "bio-criticality-none"
   "A" "bio-criticality-low"
   "B" "bio-criticality-warning"
   "C" "bio-criticality-warning"
   "D" "bio-criticality-problem"
   "E" "bio-criticality-problem"})

;;; TODO: with the new biomarkers schema, this will only ever return "none"
(defn- score-biomarker
  [m]
  (get biomarker-category->criticallity m "bio-criticality-none"))

;;; TODO: with the new biomarkers schema, this will only ever indicate statuses of "bio-criticality-none"
(defn- score-biomarkers
  [{:keys [biomarkercurrent biomarkermonthback biomarkeryearback] :as dashboard}]
  (-> dashboard
      (assoc :biomarkercurrentstatus (score-biomarker biomarkercurrent))
      (assoc :biomarkermonthbackstatus (score-biomarker biomarkermonthback))
      (assoc :biomarkeryearbackstatus (score-biomarker biomarkeryearback))))

(defn- categorize-biomarker
  [m]
  (get biomarker-score->category m "-"))

(defn- analysis-version-contains-code-health?
  "Duplicated with the code in common -- we got a dependency cycle!"
  [dir]
  (let [analysis-version (versioning/get-analysis-version dir)]
    (>= analysis-version 7)))

(defn- presentable-biomarkers
  [dashboard analysis-dir]
  (if (analysis-version-contains-code-health? analysis-dir)
    dashboard ; already contains the correct data
    (-> dashboard
        (update-in [:biomarkercurrent] categorize-biomarker)
        (update-in [:biomarkermonthback] categorize-biomarker)
        (update-in [:biomarkeryearback] categorize-biomarker))))

(defn dashboard-parameters-for
  [path-fn]
  (let [dir (path-fn "")]
    (-> (into {} (csv/read-csv dir paths/dashboard-csv false))
        (update-keys keyword)
        (presentable-biomarkers dir)
        score-biomarkers)))

(defn- parse-order-by-path
  "Parses a dot-notation order-by string into a vector of keywords.
   E.g., 'analysis.code_health.now' -> [:analysis :code_health :now]"
  [order_by]
  (when (and order_by (not (str/blank? order_by)))
    (mapv keyword (str/split order_by #"\."))))

(defn- order-by-comparator
  "Extracts a comparable numeric value from an item based on the order-by field.
   Supports both flat fields (e.g., 'lines_of_code') and nested paths (e.g., 'analysis.code_health.now')."
  [order_by item]
  (let [path (parse-order-by-path order_by)
        order_by_value (if (and path (> (count path) 1))
                         (get-in item path)
                         (get item (keyword order_by)))]
    (cond
      (number? order_by_value) order_by_value
      (nil? order_by_value) Double/NEGATIVE_INFINITY
      :else 0)))

(defn paginated-ordered-data
  "get paginated data based on page and page_size"
  [data {:keys [page page-size order-by results-k] fields-str :fields filter-str :filter :as _options}]
  (let [filtered-data (filter (item-filter/parse-filter filter-str) data)
        fields-data (->> (str/split (or fields-str "") #",")
                         (remove str/blank?)
                         (map #(-> % str/trim keyword)))
        select-fields (fn [files]
                        (if (seq fields-data)
                          (map #(select-keys % fields-data) files)
                          files))
        max-pages (-> (count filtered-data)
                      (/ page-size)
                      (double)
                      (Math/ceil)
                      (int))
        result (->> (sort-by #(order-by-comparator order-by %) > filtered-data)
                    (drop (* (- page 1) page-size))
                    (take page-size)
                    select-fields
                    ;; we need to return a vector to avoid issue from compojure-api which return reverses LazySeqs
                    ;;(see https://github.com/metosin/compojure-api/issues/412)
                    (into []))]
    {:page page 
     :max_pages max-pages 
     results-k result}))

(defn system-health-for-component
  [analysis-dir component]
  (when analysis-dir
    (let [system-health (csv/read-csv analysis-dir paths/system-hotspots-health-csv)
          keyseq [:current-score :year-score :system-mastery :month-score
                  :code-health-now-worst-performer :code-health-now-weighted-average]]
      (when-some [data (m/find-first #(= component (:name %)) (system-health/csv-rows->system-health system-health ""))]
        (select-keys data keyseq)))))

(defn component-list-names-in-analysis
  [analysis-dir]
  (mapv :name (csv/read-csv analysis-dir paths/system-hotspots-health-csv)))

(defn- hotspot-data
  [analysis-dir component-name]
  (let [spots (parse/hotspots [] parse/no-visualization-filter (io/file analysis-dir paths/architectural-level-hotspots-csv))]
    (->> (filter #(= component-name (first %)) spots)
         first
         (zipmap [:name :change_frequency :lines_of_code]))))

(defn- main-devs-data
  [analysis-dir component-name]
  (let [main-devs (parse/knowledge (io/file analysis-dir paths/architectural-level-main-dev-csv))]
    (-> (get main-devs component-name)
        (set/rename-keys {:ownership :ownership_percentage})
        (m/update-existing :ownership_percentage number-util/floor-number-when-exists))))

(defn- knowledge-loss-data
  [analysis-dir component-name]
  (let [knowledge-loss (parse/knowledge-loss (io/file analysis-dir paths/arch-knowledge-loss-csv))]
    {:knowledge_loss_percentage (-> (get knowledge-loss component-name)
                                    (get :loss)
                                    (#(if (number? %) (number-util/floor-with-precision 2 %) "-")))}))

(defn- main-teams-data
  [analysis-dir component-name]
  (let [main-teams (parse/knowledge (io/file analysis-dir paths/architectural-level-team-ownership-csv))]
    (-> (get main-teams component-name)
        (set/rename-keys {:owner :primary_team :ownership :team_ownership})
        (m/update-existing :team_ownership number-util/floor-number-when-exists))))

(defn- fragmentation-data
  [analysis-dir component-name]
  (let [fragmentation (parse/fragmentation (io/file analysis-dir paths/architectural-fragmentation-csv-file-name))]
    (-> (get fragmentation component-name)
        (set/rename-keys {:fractal :team_fragmentation_percentage :authors :number_of_authors})
        (m/update-existing :team_fragmentation_percentage number-util/floor-number-when-exists)
        (m/update-existing :number_of_authors display/->maybe-int))))

(defn- system-health-data
  [analysis-dir component-name]
  (let [keyseq [:current-score :year-score :month-score
                :code-health-now-worst-performer :code-health-now-weighted-average]
        system-health (reduce #(m/update-existing %1 %2 number-util/floor-number-when-exists)
                              (system-health-for-component analysis-dir component-name)
                              keyseq)]
    (when system-health {:system_health  (select-keys system-health keyseq)
                         :system_mastery (:system-mastery system-health)})))

(defn- coverage-metric-summary
  [{:keys [value hotspot-coverage-kpi] :as sub-kpi-data}]
  (let [{:keys [missing-data-for-files
                total-loc
                covered-loc]} (get-in sub-kpi-data [:coverage-status :details])]
    (m/assoc-some {}
                  :overall_coverage value
                  :hotspot_coverage hotspot-coverage-kpi
                  :uncovered_files missing-data-for-files
                  :uncovered_lines (when (and total-loc covered-loc)
                                     (- total-loc covered-loc)))))

(defn- code-coverage-summary*
  [summary]
  (let [status-details (get-in summary [:coverage-status :details])
          ;; Line coverage is stored at the root, not as a sub-kpi
        line-coverage (coverage-metric-summary summary)]
    {:code_coverage (assoc
                        ;; Include line-coverage as main kpi...
                     line-coverage
                        ;; ...but we also want line coverage under a separate entry...
                     :metrics (into [(assoc line-coverage :metric "line-coverage")]
                                       ;; ...together with the other metrics
                                    (map (fn [{:keys [name] :as m}]
                                           (assoc (coverage-metric-summary m) :metric name))
                                         (:sub-kpis summary))))}))

(defn code-coverage-summary
  ([path-fn] ;; overall summary
   (when-let [summary (json/read-when-exists (path-fn paths/code-coverage-summary-json))]
     (code-coverage-summary* summary)))
  ([analysis-dir component-name] ;; summary for component
   (when-let [summary (json/read-when-exists (io/file analysis-dir paths/code-coverage-summary-json))]
     (some->> (:architectural-components summary)
              (m/find-first #(= component-name (:name %)))
              code-coverage-summary*))))

(defn get-component-by-name
  [analysis-dir component-name]
  (when (some #{component-name} (component-list-names-in-analysis analysis-dir))
    (let [code-age (parse/code-age (io/file analysis-dir paths/architectural-level-code-age-csv))
          sprawl (parse/sprawl (io/file analysis-dir paths/architectural-technical-sprawl-csv))
          cost-hotspots (parse/cost-hotspots {} (io/file analysis-dir paths/pm-cost-hotspots-by-logical-component-csv))]
      (merge (hotspot-data analysis-dir component-name)
             (main-devs-data analysis-dir component-name)
             {:last_modification_age_in_months (get code-age component-name "-")}
             {:language (get sprawl component-name "-")}
             {:cost (get cost-hotspots component-name 0)}
             (knowledge-loss-data analysis-dir component-name)
             (main-teams-data analysis-dir component-name)
             (fragmentation-data analysis-dir component-name)
             (system-health-data analysis-dir component-name)
             (code-coverage-summary analysis-dir component-name)))))

(defn throw-invalid-analyses-request
  [msg]
  (throw+ {:type :invalid-analyses-request :message msg}))

(defn validate-component
  [{:keys [project-id path-fn message]} component]
  (cond
    (some? message)
    (throw-invalid-analyses-request message)

    (-> (path-fn "")
        component-list-names-in-analysis
        set
        (contains? component)
        not)
    (throw-invalid-analyses-request (format "No component found with name: %s for project with ID %d" component project-id))))

(defn- process-former-contributor
  "return v when is a boolean, true if contain a value and false when is nil"
  [v]
  (if (boolean? v) v (some? v)))

(def commit-keys (spec-tools.parse/get-keys (spec-tools.parse/parse-spec ::api-analysis-spec/commit-object)))

(defn- has-ticket-id [commit]
  (-> commit :ticket-id not-empty))

(defn- distinct-by [key coll]
  (->> coll (group-by key) vals (map first)))

(defn- ->ticket-id-lookup [parsed-evolution-content]
  (->> parsed-evolution-content
       (filter has-ticket-id)
       (distinct-by :rev)
       (map (juxt :rev :ticket-id))
       (into {})))

(defn- present-commit [ticket-id-lookup commit]
  (-> commit
      (set/rename-keys {:addedcode :loc_added
                        :deletedcode :loc_deleted})
      (update :loc_added parse-long)
      (update :loc_deleted parse-long)
      (update :files parse-long)
      (select-keys commit-keys)
      (m/assoc-some :ticket-id (get ticket-id-lookup (get commit :rev)))))

(defn commits [page page-size path-fn]
  (let [revision-csv-filename (path-fn paths/diffusion-by-revision-csv)
        revision-content (csv/read-csv-v2 revision-csv-filename)]
    (if (empty? revision-content)
      (throw (ex-info "commits-data-not-found" {:kind     :csv-file-not-found
                                                :filename revision-csv-filename}))
      (let [parsed-evolution-filename (path-fn paths/parsed-evolution-csv)
            parsed-evolution-content (csv/read-csv-v2 parsed-evolution-filename)
            ticket-id-lookup (some-> parsed-evolution-content ->ticket-id-lookup)]
        (if (empty? parsed-evolution-content)
          (throw (ex-info "issues-data-not-found" {:kind     :csv-file-not-found
                                                   :filename parsed-evolution-filename}))
          (api-utils/paginated-list-result* revision-content {:page (or page 1)
                                                              :page-limit (or page-size 200)
                                                              :list-kw :commits
                                                              :processing-fn (partial present-commit ticket-id-lookup)}))))))

(def issue-keys (spec-tools.parse/get-keys (spec-tools.parse/parse-spec ::api-analysis-spec/issue-object)))

(defn- present-issue [issue]
  (let [draft (-> issue
                  (set/rename-keys {:ticket       :id
                                    :added        :loc_added
                                    :deleted      :loc_deleted
                                    :first-commit :first_commit
                                    :last-commit  :last_commit
                                    :closed       :closed_at
                                    :cycle-time   :cycle_time_hours})
                  (select-keys issue-keys))
        cleaned-up (into {}
                         (remove (fn [[k v]]
                                   (or (nil? v)
                                       (and (string? v)
                                            (string/blank? v)))))
                         draft)]
    (-> cleaned-up
        (m/update-existing :loc_added parse-long)
        (m/update-existing :loc_deleted parse-long)
        (m/update-existing :files parse-long)
        (m/update-existing :authors parse-long)
        (m/update-existing :cycle_time_hours (fn [minutes-str]
                                               (let [minutes (parse-long minutes-str)]
                                                 (quot minutes 60)))))))

(defn- present-file-changes [file-changes]
  (-> file-changes
      (select-keys [:entity :loc-deleted :loc-added])
      (set/rename-keys {:entity :file
                        :loc-deleted :loc_deleted
                        :loc-added :loc_added})
      (m/update-existing :loc_added parse-long)
      (m/update-existing :loc_deleted parse-long)))

(defn merge-churn-and-issue-data [churn-data issue-data file-changes-data]
  (let [issue-map (m/index-by :id issue-data)
        file-changes-groups (->> file-changes-data
                                 (filter (comp (complement string/blank?) :ticket-id))
                                 (group-by :ticket-id))]
    (mapv (fn do-merge [{:keys [ticket] :as churn-item}]
            (let [file-changes (some->> (get file-changes-groups ticket)
                                        (mapv present-file-changes))]
              (-> churn-item
                  (merge (get issue-map ticket))
                  (m/assoc-some :file_changes file-changes))))
          churn-data)))

(defn sort-issues [issues]
  (sort-by (fn [{:keys [id last_commit]}]
             (or last_commit ;; I don't think it should be absent, but just in case
                 id))
           #(compare %2 %1) ;; sort descending
           issues))

(defn issues [page page-size path-fn]
  (let [churn-csv-filename (path-fn paths/abs-churn-by-ticket-id)
        churn-content (csv/read-csv-v2 churn-csv-filename)]
    (if (empty? churn-content)
      (throw (ex-info "churn-data-not-found" {:kind     :csv-file-not-found
                                              :filename churn-csv-filename}))
      (let [issues-filename (path-fn paths/pm-issues-csv)
            issues-content (csv/read-csv-v2 issues-filename)]
        (if (empty? issues-content)
          (throw (ex-info "issues-data-not-found" {:kind     :csv-file-not-found
                                                   :filename issues-filename}))
          (let [file-changes-filename (path-fn paths/pm-resolved-log)
                file-changes-content (csv/read-csv-v2 file-changes-filename)]
            (when (empty? file-changes-content)
              (log/warn "Files changes data not found" {:filename file-changes-filename}))
            (let [merged-content (merge-churn-and-issue-data churn-content issues-content file-changes-content)
                  presented-content (mapv present-issue merged-content)
                  sorted-content (sort-issues presented-content)]
              (api-utils/paginated-list-result* sorted-content {:page (or page 1)
                                                                :page-limit (or page-size 200)
                                                                :list-kw :issues
                                                                :processing-fn identity}))))))))

(defn author-statistics
  [path-fn]
  (let [kmap {:deleted      :lines_of_code_removed
              :added        :lines_of_code_added
              :net          :lines_of_code_net
              :months       :months_contributing
              :lastcontrib  :last_contribution
              :firstcontrib :first_contribution
              :revisions    :commits
              :exdev        :former_contributor}
        transform-fn (fn [m] (-> (update m :exdev process-former-contributor)
                                 (set/rename-keys kmap)))]
    (->> (path-fn paths/developer-impact-json)
         json/read-when-exists
         (map transform-fn))))

(defn branch-statistics
  [path-fn]
  (let [selected-keys [:name :repository :ttl :risk :commits :authors
                       :start :end :description :leadtimefirstcommit :leadmerge :merged]
        kmap {:name :branch
              :ttl :branch_duration_in_hours
              :risk :delivery_risk
              :authors :contributing_authors
              :start :first_commit_date
              :end :last_commit_date
              :description :risk_description
              :leadmerge :lead_time_to_merge_in_hours
              :leadtimefirstcommit :lead_time_first_commit_in_hours
              :repository :repository_name}
        keys-to-cast [:lead_time_first_commit_in_hours :delivery_risk :lead_time_to_merge_in_hours
                      :branch_duration_in_hours :commits :contributing_authors]
        cast-to-int (partial reduce #(m/update-existing %1 %2 display/->maybe-int))
        transform-fn (fn [m] (-> (select-keys m selected-keys)
                                 (set/rename-keys kmap)
                                 (cast-to-int keys-to-cast)
                                 (m/update-existing :merged parse-boolean)))]
    (->> (path-fn paths/branch-statistics-csv)
         csv/read-csv
         (map transform-fn))))

(defn create-defect-ids-lookup
  "Create a map from file to defect-ticket-ids"
  [path-fn]
  (let [is-defect? (->> paths/pm-defects-csv path-fn csv/read-csv
                        (map :id)
                        set)
        files->commits (->> paths/pm-resolved-log path-fn csv/read-csv
                            (group-by :entity))]

    (->> files->commits
         (m/map-vals (fn [commits]
                       (->> commits
                            (map :ticket-id)
                            (distinct)
                            (keep #(when (is-defect? %) %)))))
         (m/filter-vals seq))))
