(ns codescene.features.analysis.util
  (:require [codescene.analysis.versioning :as versioning]
            [codescene.analysis.paths :as paths]
            [codescene.analysis.analysis-context-builder :as analysis-context-builder]
            [codescene.code-health.persisted-code-health-details :as persisted-code-health-details]
            [codescene.code-health.queries :as code-health]
            [codescene.util.json :as json]
            [codescene.features.util.number :as number-util]
            [codescene.note-watch.supervised-scores :as supervised-scores]
            [codescene.presentation.display :as display]
            [codescene.presentation.system-health :as system-health]
            [codescene.features.util.csv :as csv-util]
            [hotspots-x-ray.recommendations.code-health-interpretations :as chi]
            [codescene.analysis.visualization.parse-analysis-results :as parse]
            [codescene.features.analysis.item-filter :as item-filter]
            [clojure.core.memoize :as memo]
            [clojure.set :as set]
            [clojure.string :as string]
            [clojure.java.io :as io]
            [semantic-csv.core :as sc]
            [clj-time.core :as t]
            [clj-time.format :as f]
            [medley.core :as m]
            [slingshot.slingshot :refer [throw+]]))

(def analysis-time-formatter (f/with-zone (f/formatters :date-hour-minute)
                               (t/default-time-zone)))

(def ^:const bio-marker-severity [{:indication 3 :severity "alert"}
                                  {:indication 2 :severity "warning"}
                                  {:indication 1 :severity "improvement"}])

(def ^:private biomarker-score->category
  {"1" "A"
   "2" "B"
   "3" "C"
   "4" "D"
   "5" "E"})

(def ^:private biomarker-category->criticallity
  {"-" "bio-criticality-none"
   "A" "bio-criticality-low"
   "B" "bio-criticality-warning"
   "C" "bio-criticality-warning"
   "D" "bio-criticality-problem"
   "E" "bio-criticality-problem"})

;;; TODO: with the new biomarkers schema, this will only ever return "none"
(defn- score-biomarker
  [m]
  (get biomarker-category->criticallity m "bio-criticality-none"))

;;; TODO: with the new biomarkers schema, this will only ever indicate statuses of "bio-criticality-none"
(defn- score-biomarkers
  [{:keys [biomarkercurrent biomarkermonthback biomarkeryearback] :as dashboard}]
  (-> dashboard
      (assoc :biomarkercurrentstatus (score-biomarker biomarkercurrent))
      (assoc :biomarkermonthbackstatus (score-biomarker biomarkermonthback))
      (assoc :biomarkeryearbackstatus (score-biomarker biomarkeryearback))))

(defn- categorize-biomarker
  [m]
  (get biomarker-score->category m "-"))

(defn- analysis-version-contains-code-health?
  "Duplicated with the code in common -- we got a dependency cycle!"
  [dir]
  (let [analysis-version (versioning/get-analysis-version dir)]
    (>= analysis-version 7)))

(defn- presentable-biomarkers
  [dashboard analysis-dir]
  (if (analysis-version-contains-code-health? analysis-dir)
    dashboard ; already contains the correct data
    (-> dashboard
        (update-in [:biomarkercurrent] categorize-biomarker)
        (update-in [:biomarkermonthback] categorize-biomarker)
        (update-in [:biomarkeryearback] categorize-biomarker))))

(defn dashboard-parameters-for
  [path-fn]
  (let [dir (path-fn "")]
    (-> (into {} (csv-util/read-csv dir paths/dashboard-csv false))
        (update-keys keyword)
        (presentable-biomarkers dir)
        score-biomarkers)))

(defn- order-by-comparator
  [order_by item]
  (let [order_by_value (or ((keyword order_by) item) 0)]
    (if (= "code_health" order_by)
      (let [code_health_now (:current_score order_by_value)]
        (cond
          (number? code_health_now) code_health_now
          :else 0))
      order_by_value)))

(defn paginated-ordered-data
  "get paginated data based on page and page_size"
  [data {:keys [page page-size order-by] filter-str :filter :as _options}]
  (let [filtered-data (filter (item-filter/parse-filter filter-str) data)
        max-pages (-> (count filtered-data)
                      (/ page-size)
                      (double)
                      (Math/ceil)
                      (int))
        result (->> (sort-by #(order-by-comparator order-by %) > filtered-data)
                    (drop (* (- page 1) page-size))
                    (take page-size)
                    ;; we need to return a vector to avoid issue from compojure-api which return reverses LazySeqs
                    ;;(see https://github.com/metosin/compojure-api/issues/412)
                    (into []))]
    {:page page :max_pages max-pages :files result}))

(defn- numeric-code-health
  [v]
  (and v
       (display/->maybe-double v)))

(defn- lookupable-code-health-scores-depending-on-analysis
  "If we have the full scan data, then we'll use that. Otherwise we
   fall back on the older API and only present hotspots code health."
  [analysis-path-fn]
  (if (.exists (io/file (analysis-path-fn paths/full-scan-code-health-csv)))
    (->> (analysis-path-fn paths/full-scan-code-health-csv)
         sc/slurp-csv
         (map (fn [{:keys [name now month year]}]
                [name {:now (numeric-code-health now)
                       :last-month (numeric-code-health month)
                       :last-year (numeric-code-health year)}]))
         (into {}))
    (code-health/lookupable-file-level-hotspots analysis-path-fn)))

(defn- process-social-individual-data
  [data]
  (map #(-> %
            (select-keys [:ownership :path :nauthors :fragmentation])
            (assoc :path (string/join "/" (:path %)))
            (set/rename-keys {:ownership :main_author_contribution_percentage
                              :nauthors :number_of_authors
                              :fragmentation :author_contributions_fragmentation})) data))

(defn- process-social-team-data
  [data]
  (map #(-> %
            (select-keys [:owner :ownership :path :nauthors :fragmentation])
            (assoc :path (string/join "/" (:path %)))
            (set/rename-keys {:owner :primary_team
                              :ownership :team_ownership
                              :nauthors :number_of_teams
                              :fragmentation :teams_contributions_fragmentation})) data))

(defn- flatten-tree
  "flat recursively all :children from the given data"
  [data]
  (if-let [children (:children data)]
    (flatten (map flatten-tree children))
    data))

(defn- get-social-team-system-map
  [analysis-dir process-fn file]
  (let [json-data (json/read-when-exists analysis-dir file)]
    (when (:children json-data)
      (-> json-data
          flatten-tree
          process-fn))))

(defn- social-data-for
  [analysis-dir]
  (let [individual-social-data (get-social-team-system-map analysis-dir process-social-individual-data paths/social-system-map-file-name)
        social-team-system-map (get-social-team-system-map analysis-dir process-social-team-data paths/social-team-system-map-file-name)
        file->team-data (group-by :path social-team-system-map)]
    (->> individual-social-data
         (map (fn [{:keys [path] :as individual}]
                (merge individual (first (get file->team-data path))))))))

(defn- rename-map-keys
  [map]
  (set/rename-keys map {:ndefects      :number_of_defects
                        :revs          :change_frequency
                        :ownership     :ownership_percentage
                        :defectdensity :defectdensity_percentage
                        :loss          :knowledge_loss_percentage
                        :size          :lines_of_code
                        :age           :last_modification_age_in_months}))

(defn get-recommendations-for-markers
  [context markers]
  (let [interpretation (if (true? (:calculated markers)) (chi/interpret-for-a-human context markers) [])]
    (map (fn [item]
           (-> (merge item (first (filter #(= (:indication %) (:indication item)) bio-marker-severity)))
               (select-keys [:title :severity])))
         interpretation)))

;; Cached version of `persisted-code-health-details/code-health-details-for` to avoid rereading the same code health files
;; on consecutive api calls (when fetching pages of data with a script)
(def ^:private code-health-details-for (memo/ttl persisted-code-health-details/code-health-details-for :ttl/threshold 60000))

(defn- get-recommendations-for
  [{:keys [analysis-path] :as context} name]
  ;; Note that here we read a separate details json file for every file in the analysis
   (let [markers (code-health-details-for analysis-path name)]
     (get-recommendations-for-markers context markers)))

(defn- get-goals-for
  [_context notes name]
  (->> notes
       (filter #(string/includes? (:name %) name))
       (map :note)
       (map (fn [{:keys [category note-text]}]
              {:category category
               :text note-text}))))

(defn- format-score [x]
  (if (and (number? x) (pos? x)) 
    (number-util/round-code-health-scores x) 
    ;; TODO: knorrest - why return a string representation for "no score"?
    "-"))

(defn- code-health-for-path
  [lookupable-scores path]
  (let [{:keys [now last-month last-year]} (get lookupable-scores path)]
    {:year_score  (format-score last-year)
     :month_score (format-score last-month)
     :current_score (format-score now)}))

(defn- process-file-data
  [path->additional-data social-team-system-map data]
  (let [data-with-social (set/join (set (map #(assoc % :path (string/join "/" (:path %))) data)) (set social-team-system-map))]
    (map (fn [{:keys [path] :as item}]
           (-> (merge {:number_of_defects 0 :cost 0} item)
               rename-map-keys
               (select-keys [:path :lines_of_code :last_modification_age_in_months :name :language
                             :ownership_percentage :owner :cost :knowledge_loss_percentage
                             :change_frequency :system_health :number_of_defects :team_ownership :primary_team
                             :number_of_teams :number_of_authors :author_contributions_fragmentation
                             :teams_contributions_fragmentation])
               (m/update-existing :ownership_percentage number-util/floor-number-when-exists)
               (m/update-existing :knowledge_loss_percentage number-util/floor-number-when-exists)
               (m/update-existing :team_ownership number-util/floor-number-when-exists)
               (m/update-existing :number_of_teams display/->maybe-int)
               (m/update-existing :number_of_authors display/->maybe-int)
               (m/update-existing :author_contributions_fragmentation number-util/floor-number-when-exists)
               (m/update-existing :teams_contributions_fragmentation number-util/floor-number-when-exists)
               (merge (path->additional-data path)))) 
         data-with-social)))

(defn file-list
  "return the analysis file list with code health and known biomarker "
  [analysis]
  (let [analysis-dir (:dir analysis)
        context (assoc (analysis-context-builder/build-context-for-existing-analysis (partial paths/make-analysis-path-to analysis-dir))
                       :analysis-path analysis-dir)
        lookupable-scores (lookupable-code-health-scores-depending-on-analysis (partial paths/make-analysis-path-to analysis-dir))
        social-team-system-map (social-data-for analysis-dir)
        files-data (json/read-when-exists analysis-dir paths/system-map-json)
        notes (->> (json/read-when-exists analysis-dir paths/risks-for-notes-json)
                   (map :note-score)
                   (map :note)
                   (map supervised-scores/file-details-for-note))
        path->additional-data (fn [path]
                                {:recommendations (get-recommendations-for context path)
                                 :goals (get-goals-for context notes path)
                                 :code_health (code-health-for-path lookupable-scores path)})]
    (when (:children files-data)
      (->> files-data
           flatten-tree
           (process-file-data path->additional-data social-team-system-map)))))

(defn system-health-for-component
  [analysis-dir component]
  (when analysis-dir
    (let [system-health (csv-util/read-csv analysis-dir paths/system-hotspots-health-csv)
          keyseq [:current-score :year-score :system-mastery :month-score
                  :code-health-now-worst-performer :code-health-now-weighted-average]]
      (when-some [data (m/find-first #(= component (:name %)) (system-health/csv-rows->system-health system-health ""))]
        (select-keys data keyseq)))))

(defn component-list-names-in-analysis
  [analysis-dir]
  (mapv :name (csv-util/read-csv analysis-dir paths/system-hotspots-health-csv)))

(defn- hotspot-data
  [analysis-dir component-name]
  (let [spots (parse/hotspots [] parse/no-visualization-filter (io/file analysis-dir paths/architectural-level-hotspots-csv))]
    (->> (filter #(= component-name (first %)) spots)
         first
         (zipmap [:name :change_frequency :lines_of_code]))))

(defn- main-devs-data
  [analysis-dir component-name]
  (let [main-devs (parse/knowledge (io/file analysis-dir paths/architectural-level-main-dev-csv))]
    (-> (get main-devs component-name)
        (set/rename-keys {:ownership :ownership_percentage})
        (m/update-existing :ownership_percentage number-util/floor-number-when-exists))))

(defn- knowledge-loss-data
  [analysis-dir component-name]
  (let [knowledge-loss (parse/knowledge-loss (io/file analysis-dir paths/arch-knowledge-loss-csv))]
    {:knowledge_loss_percentage (-> (get knowledge-loss component-name)
                                    (get :loss)
                                    (#(if (number? %) (number-util/floor-with-precision 2 %) "-")))}))

(defn- main-teams-data
  [analysis-dir component-name]
  (let [main-teams (parse/knowledge (io/file analysis-dir paths/architectural-level-team-ownership-csv))]
    (-> (get main-teams component-name)
        (set/rename-keys {:owner :primary_team :ownership :team_ownership})
        (m/update-existing :team_ownership number-util/floor-number-when-exists))))

(defn- fragmentation-data
  [analysis-dir component-name]
  (let [fragmentation (parse/fragmentation (io/file analysis-dir paths/architectural-fragmentation-csv-file-name))]
    (-> (get fragmentation component-name)
        (set/rename-keys {:fractal :team_fragmentation_percentage :authors :number_of_authors})
        (m/update-existing :team_fragmentation_percentage number-util/floor-number-when-exists)
        (m/update-existing :number_of_authors display/->maybe-int))))

(defn- system-health-data
  [analysis-dir component-name]
  (let [keyseq [:current-score :year-score :month-score
                :code-health-now-worst-performer :code-health-now-weighted-average]
        system-health (reduce #(m/update-existing %1 %2 number-util/floor-number-when-exists)
                              (system-health-for-component analysis-dir component-name)
                              keyseq)]
    (when system-health {:system_health  (select-keys system-health keyseq)
                         :system_mastery (:system-mastery system-health)})))

(defn get-component-by-name
  [analysis-dir component-name]
  (when (some #{component-name} (component-list-names-in-analysis analysis-dir))
    (let [code-age (parse/code-age (io/file analysis-dir paths/architectural-level-code-age-csv))
          sprawl (parse/sprawl (io/file analysis-dir paths/architectural-technical-sprawl-csv))
          cost-hotspots (parse/cost-hotspots {} (io/file analysis-dir paths/pm-cost-hotspots-by-logical-component-csv))]
      (merge (hotspot-data analysis-dir component-name)
             (main-devs-data analysis-dir component-name)
             {:last_modification_age_in_months (get code-age component-name "-")}
             {:language (get sprawl component-name "-")}
             {:cost (get cost-hotspots component-name 0)}
             (knowledge-loss-data analysis-dir component-name)
             (main-teams-data analysis-dir component-name)
             (fragmentation-data analysis-dir component-name)
             (system-health-data analysis-dir component-name)))))

(defn transformations-from
  [analysis-path-fn]
  (-> paths/architectural-transformations-csv analysis-path-fn sc/slurp-csv))

(defn throw-invalid-analyses-request
  [msg]
  (throw+ {:type :invalid-analyses-request :message msg}))

(defn validate-component
  [{:keys [project-id path-fn message]} component]
  (cond
    (some? message)
    (throw-invalid-analyses-request message)

    (-> (path-fn "")
        component-list-names-in-analysis
        set
        (contains? component)
        not)
    (throw-invalid-analyses-request (format "No component found with name: %s for project with ID %d" component project-id))))

(defn- process-former-contributor
  "return v when is a boolean, true if contain a value and false when is nil"
  [v]
  (if (boolean? v) v (some? v)))

(defn author-statistics
  [path-fn]
  (let [kmap {:deleted      :lines_of_code_removed
              :added        :lines_of_code_added
              :net          :lines_of_code_net
              :months       :months_contributing
              :lastcontrib  :last_contribution
              :firstcontrib :first_contribution
              :revisions    :commits
              :exdev        :former_contributor}
        transform-fn (fn [m] (-> (update m :exdev process-former-contributor)
                                 (set/rename-keys kmap)))]
    (->> (path-fn paths/developer-impact-json)
         json/read-when-exists
         (map transform-fn))))

(defn branch-statistics
  [path-fn]
  (let [selected-keys [:name :repository :ttl :risk :commits :authors
                       :start :end :description :leadtimefirstcommit :leadmerge :merged]
        kmap {:name :branch
              :ttl :branch_duration_in_hours
              :risk :delivery_risk
              :authors :contributing_authors
              :start :first_commit_date
              :end :last_commit_date
              :description :risk_description
              :leadmerge :lead_time_to_merge_in_hours
              :leadtimefirstcommit :lead_time_first_commit_in_hours
              :repository :repository_name}
        keys-to-cast [:lead_time_first_commit_in_hours :delivery_risk :lead_time_to_merge_in_hours
                      :branch_duration_in_hours :commits :contributing_authors]
        cast-to-int (partial reduce #(m/update-existing %1 %2 display/->maybe-int))
        transform-fn (fn [m] (-> (select-keys m selected-keys)
                                 (set/rename-keys kmap)
                                 (cast-to-int keys-to-cast)
                                 (m/update-existing :merged parse-boolean)))]
    (->> (path-fn paths/branch-statistics-csv)
         csv-util/read-csv
         (map transform-fn))))

(defn create-defect-ids-lookup
  "Create a map from file to defect-ticket-ids"
  [path-fn]
  (let [is-defect? (->> paths/pm-defects-csv path-fn sc/slurp-csv
                        (map :id)
                        set)
        files->commits (->> paths/pm-resolved-log path-fn sc/slurp-csv
                            (group-by :entity))]

    (->> files->commits
         (m/map-vals (fn [commits]
                       (->> commits
                            (map :ticket-id)
                            (distinct)
                            (keep #(when (is-defect? %) %)))))
         (m/filter-vals seq))))
