(ns codescene.features.code-coverage.parsers.core
    "This module auto-deduces the coverage parser to use.
     
     Motivation: an enterprise codebase will have multiple programming languages which 
     means multiple coverage formats. To specify every single coverage file + its format on a 
     CLI invokation is painful. Instead, we check the coverage reports and automatically 
     deduce its format, allowing us to automatically create the correct parser."
  (:require [codescene.features.code-coverage.parser :as parser]
            [codescene.features.code-coverage.parsers.open-clover-parser :as open-clover-parser]
            [codescene.features.code-coverage.parsers.cobertura-parser :as cobertura-parser]
            [codescene.features.code-coverage.parsers.jacoco-parser :as jacoco-parser]
            [codescene.features.code-coverage.parsers.lcov-parser :as lcov-parser]
            [codescene.features.code-coverage.parsers.bullseye-parser :as bullseye-parser]
            [codescene.features.code-coverage.parsers.dotcover-parser :as dotcover-parser]
            [codescene.features.code-coverage.parsers.open-cover-parser :as open-cover-parser]
            [codescene.features.code-coverage.parsers.ncover-parser :as ncover-parser]
            [codescene.features.code-coverage.parsers.edn-parser :as edn-parser]

            [evolutionary-metrics.mining.file-patterns :as patterns]
            [medley.core :as m]
            [clojure.string :as s]
            [clojure.data.xml :as xml]
            [clojure.java.io :as io]))

(defn- jacoco?
  [{:keys [root-tag]}]
  (= root-tag :report))

(defn- cobertura?
  [{:keys [root-tag children]}]
  (and
   (= root-tag :coverage)
   (some #(= (:tag %) :packages) children)))

(defn- open-clover?
  [{:keys [root-tag children]}]
  (and
   (= root-tag :coverage)
   (some #(= (:tag %) :project) children)))

(defn- bulls-eye?
  [{:keys [root-tag]}]
  (= root-tag :BullseyeCoverage))

(defn- dot-cover?
  [{:keys [root-tag children]}]
  (and (or (= root-tag :Root)
           (= root-tag :CoverageReport))
       (some #(= (:tag %) :Assembly) children)))

(defn- open-cover?
  [{:keys [root-tag children]}]
  (and (= root-tag :CoverageSession)
       (some #(= (:tag %) :Modules) children)
       (some #(= (:tag %) :Summary) children)))

(defn- ncover?
  [{:keys [root-tag children]}]
  (and (= root-tag :coverage)
       (some (fn [child]
               ; expecting module -> method*
               (let [child-content (:content child)]
                 (and (= (:tag child) :module)
                      (some #(= (:tag %) :method) child-content))))
             children)))

(defn- pure-text
  [_unused]
  ; We expect LCov and edn to be parsed separately, but we need this function as a placeholder 
  ; to maintain the structure of the named-parsers table.
  (throw (IllegalStateException. 
          "Error parsing coverage report: this is a pure text format, not XML. " 
          "There's nothing you can do except reporting this to CodeScene's support. (We've made an error).")))

(def ^:private named-parsers
  {"JaCoCo"        {:parser (jacoco-parser/->Parser)
                    :format-detector jacoco?}
   "Cobertura"     {:parser (cobertura-parser/->Parser)
                    :format-detector cobertura?}
   "OpenClover"    {:parser (open-clover-parser/->Parser)
                    :format-detector open-clover?}
   "BullsEye"      {:parser (bullseye-parser/->Parser)
                    :format-detector bulls-eye?
                    :user-facing-hint "BullsEye (XML report, _not_ binary format)"}
   "DotCover"      {:parser (dotcover-parser/->Parser)
                    :format-detector dot-cover?
                    :user-facing-hint "DotCover (XML report, _not_ .dcvr snapshots)"}
   "OpenCover"     {:parser (open-cover-parser/->Parser)
                    :format-detector open-cover?}
   "LCov"          {:parser (lcov-parser/->Parser)
                    :format-detector pure-text}
   "edn"           {:parser (edn-parser/->Parser)
                   :format-detector pure-text}
   "NCover"        {:parser (ncover-parser/->Parser)
                    :format-detector ncover?}})

(defn- parser-matching
  [coverage-format]
  (-> (get named-parsers coverage-format)
      :parser))

(defn parser-matching-given
  "A legacy function from back in the day when we couldn't auto-detect coverage format.
   => REMOVE this function once the code is migrated to auto-detect."
  [format]
  (let [parsers (->> named-parsers vals (map :parser))
        compareable-format (s/lower-case format)]
    (m/find-first #(or (= compareable-format (parser/id %))
                       (= compareable-format (s/lower-case (parser/name %))))
                  parsers)))

(defn- describe-support-to-user
  "Some coverage tools support multiple formats, but we don't: we want text and XML.
   Make sure to include that info in any error messages."
  [[coverage-format {:keys [user-facing-hint] :as _parser-def}]]
  (or user-facing-hint
      coverage-format))

(def ^:private supported-formats 
  (->> named-parsers
       (map describe-support-to-user)
       sort 
       (s/join ",")))

;; We dispatch on two properties of the coverage file:
;;
;; 1. File format: pure text or XML? 
;;    This lets us separate out lcov from other formats.
;;    Here's how the pure text parser creation works:

(defmulti make-text-based-parser-for
  (fn [file-path]
    (patterns/extension-of file-path)))

(defmethod make-text-based-parser-for :default [file-path]
  (throw (IllegalArgumentException.
             (str "Cannot determine the coverage report format for " file-path
                  ". Supported coverage formats are: "
                  supported-formats
               ". (If it is LCOV, then rename the report file to .info)."))))

(defmethod make-text-based-parser-for ".info" [_file-path]
  (parser-matching "LCov"))

(defmethod make-text-based-parser-for ".edn" [_file-path]
  (parser-matching "edn"))

;; This means that pure text is parsed separately from XML where we need to 
;; sniff at the data. So remove the LCov parser from the set of XML candidates:
(def ^:private pure-text-parser? #{"LCov" "edn" })

;; 2. Now, if XML, then we also need to check the root element and dispatch on its root element.
;;    This lets us deduce the coverage format for XML based reports.

(def ^:private xml-content->parser-candidate
  (->> named-parsers
       (remove (fn [[parser-name _parser-def]]
                 (pure-text-parser? parser-name)))
       (map (fn [[_parser-name parser-def]]
              {:supports-format? (:format-detector parser-def)
               :parser (:parser parser-def)}))))

(defn- parser-supports-format? 
  [top-level-content {:keys [supports-format?]}]
   (supports-format? top-level-content))

(defn- make-xml-based-parser-for
  [file-path]
  (let [parsed-xml (xml/parse (io/reader file-path) :namespace-aware false :support-dtd false)
        top-level-content {:root-tag (:tag parsed-xml)
                           :children (:content parsed-xml)}
        candidate (some->> xml-content->parser-candidate
                           (m/find-first (partial parser-supports-format? top-level-content))
                           :parser)]
    (if candidate
      candidate
      (throw (IllegalArgumentException.
              (str "Cannot determine the coverage report format for " file-path
                   ". Supported coverage formats are: "
                   supported-formats))))))

(defn make-parser-based-on-format-in
  [an-unknown-coverage-report-file]
  (let [xml? (-> an-unknown-coverage-report-file
                 patterns/extension-of
                 (= ".xml"))]
    (if xml?
      (make-xml-based-parser-for  an-unknown-coverage-report-file)
      (make-text-based-parser-for an-unknown-coverage-report-file))))