(ns codescene.features.refactorings.core
  (:require
    [clojure.string :as str]
    [clojure.java.io :as io]
    [evolutionary-metrics.mining.file-patterns :as file-patterns]
    [hotspots-x-ray.recommendations.code-health-interpretations :as chi]
    [semantic-csv.core :as sc]
    [codescene.analysis.paths :as paths]
    [codescene.mining-tasks.code-health-improvements :refer [csv-header-ml-training-features]])
  (:import java.util.Base64
           java.io.FileNotFoundException))

(defn- decode [base64-string]
  (let [bytes (.decode (Base64/getDecoder) ^String base64-string)]
    (String. bytes "UTF-8")))

(defn- ->safe-name
  "Returns a name that fits the refactoring-samples folder structure."
  [name]
  (-> name
      str/lower-case
      (str/replace #"[,]" "")
      (str/replace #"[ ]" "_")))

(defn- fallback-to-js [issue-name]
  (io/resource (format "refactoring-samples/javascript/%s.diff" issue-name)))

(defn- diff-with-fallback-to-js [language issue-name]
  (let [diff (io/resource (format "refactoring-samples/%s/%s.diff" language issue-name))]
    (if (nil? diff)
      (fallback-to-js issue-name)
      diff)))

(defn handcrafted-fallbacks
  "Assumes that there are refactoring samples in the resource path."
  [{:keys [language improvement-type]}]
  (let [issue-name (->safe-name improvement-type)
        language (str/lower-case language)
        diff (diff-with-fallback-to-js language issue-name)]
    (when diff
      [{:diff (slurp diff)
        :language language
        :improvement-type improvement-type}])))

(comment
  (handcrafted-fallbacks {:language "Javascript" :improvement-type "Complex method"}))

(defn get-unclassified-improvements
  "Returns the unclassified mined code improvements from a
  specific analysis directory in a raw format (sequence of maps
  based on csv) 
  
  If code improvement mining is not turned on the file will not exist
  and this will return nil. If the file is empty it will also return
  nil."
  [results-absolute-path]
  (try
    (seq (sc/slurp-csv (str results-absolute-path
                            "/"
                            paths/unlabeled-code-improvements-csv)))
    (catch FileNotFoundException _ nil)))

(defn- rename-improvement-type
  "Ensure improvement-type in improvements have the expected names.
  This is determined by what they're called in the Virtual Code Review."
  ;; TODO: ideally there shouldn't be a discrepancy.
  [improvement]
  (let [rename-fn (fn [improvement-type]
                    (case improvement-type
                      "Bumpy Road Ahead" "Bumpy Road"
                      improvement-type))]
    (update improvement :improvement-type rename-fn)))

(defn- add-language [improvement]
  (assoc improvement :language (chi/filename->language (:entity improvement))))

(defn- decode-git-diff [improvement]
  (update improvement :diff decode))

(defn- decode-commit-message [improvement]
  (-> improvement
      (update :commit-title decode)
      (update :commit-full-message decode)))

(defn- process-improvements [project-id improvements]
  (->> improvements
       (map rename-improvement-type)
       (map decode-git-diff)
       (map decode-commit-message)
       (map add-language)
       (map #(assoc % :project-id project-id))
       (map #(assoc % :training-data (select-keys % csv-header-ml-training-features)))))

(defn get-code-improvements-from-analysis-result
  "Reads from the analysis result the code improvements found during
  mining."
  [project-id results-absolute-path]
  (->> (get-unclassified-improvements results-absolute-path)
       (process-improvements project-id)))

(defn improvement->recheck-info
  "Converts a code improvement to a format the code mining code accepts as input.

  Useful when rechecking old improvements."
  [improvement]
  (let [[repo relative-file-name] (file-patterns/de-scope (:filename improvement))]
    {:commit {:rev (:current-rev improvement) :name relative-file-name}
     :parent {:rev (:previous-rev improvement) :name relative-file-name}
     :repo repo}))

