/**
 * @file dozeu_interface.hpp
 * @author Hajime Suzuki
 * @date 2018/03/23
 */
#ifndef VG_DOZEU_INTERFACE_HPP_INCLUDED
#define VG_DOZEU_INTERFACE_HPP_INCLUDED

#include <algorithm>
#include <cstdint>
#include <functional>
#include <unordered_map>
#include <vector>

#include <vg/vg.pb.h>
#include "types.hpp"
#include "handle.hpp"
#include "mem.hpp"

// #define BENCH
// #include "bench.h"

// forward declarations of dozeu structs
struct dz_s;
struct dz_forefront_s;
struct dz_query_s;
struct dz_alignment_s;

namespace vg {

static constexpr uint16_t default_xdrop_max_gap_length = 40;

/**
 * Align to a graph using the xdrop algorithm, as implemented in dozeu.
 *
 * The underlying Dozeu library is fundamentally based around semi-global
 * alignment: extending an alignment from a known matching position (what
 * in other parts of vg we call "pinned" alignment).
 *
 * To simulate non-pinned alignment, we align in two passes in different
 * directions. One from a guess of a pinning position, to get a more
 * accurate "head" pinning position for the other end, and once back from
 * where the previous pass ended up, to get an overall hopefully-optimal
 * alignment.
 *
 * If the input graph is not reverse-complemented, direction = false
 * (reverse, right to left) on the first pass, and direction = true
 * (forward, left to right) on the second. If it is reverse complemented,
 * we flip them.
 *
 * This won't actually work in theory to get the optimal local alignment in
 * all cases, but it works well in practice.
 *
 * This class maintains an internal dz_s, which is *NOT THREADSAFE*,
 * and non-const during alignments. However, it may be reused for
 * subsequent alignments.
 */
class DozeuInterface {
    
public:
    
    virtual ~DozeuInterface() = default;
    /**
     * align query: forward-backward banded alignment
     *
     * Compute an alignment of the given Alignment's sequence against the
     * given DAG, using (one of) the given MEMs to seed the alignment.
     *
     * reverse_complemented is true if the topologically sorted graph we
     * have was reverse-complemented when extracted from a larger
     * containing graph, and false if it is in the same orientation as it
     * exists in the larger containing graph. The MEMs and the Alignment
     * are interpreted as being against the forward strand of the passed
     * subgraph no matter the value of this setting.
     *
     * reverse_complemented true means we will compute the alignment
     * forward in the topologically-sorted order of the given graph
     * (anchoring to the first node if no MEMs are provided) and false if
     * we want to compute the alignment backward in the topological order
     * (anchoring to the last node).
     *
     * First the head (the most upstream) seed in MEMs is selected and
     * extended downward to detect the downstream breakpoint. Next the
     * alignment path is generated by second upward extension from the
     * downstream breakpoint.
     *
     * The MEM list may be empty. If MEMs are provided, uses only the
     * begin, end, and nodes fields of the MaximalExactMatch objects. It
     * uses the first occurrence of the last MEM if reverse_complemented is
     * true, and the last occurrence of the first MEM otherwise.
     */
    void align(Alignment& alignment, const HandleGraph& graph, const vector<MaximalExactMatch>& mems,
               bool reverse_complemented, int8_t full_length_bonus,
               uint16_t max_gap_length = default_xdrop_max_gap_length);
    
    /**
     * Same as above except using a precomputed topological order, which
     * need not include all handles in the graph, and which may contain both
     * orientations of a handle.
     */
    void align(Alignment& alignment, const HandleGraph& graph, const vector<handle_t>& order,
               const vector<MaximalExactMatch>& mems, bool reverse_complemented,
               int8_t full_length_bonus, uint16_t max_gap_length = default_xdrop_max_gap_length);
    
    /**
     * Compute a pinned alignment, where the start (pin_left=true) or end
     * (pin_left=false) end of the Alignment sequence is pinned to the
     * start of the first (pin_left=true) or end of the last
     * (pin_left=false) node in the graph's topological order.
     *
     * Does not account for multiple sources/sinks in the topological
     * order; whichever comes first/last ends up being used for the pin.
     */
    void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left,
                      int8_t full_length_bonus, uint16_t max_gap_length = default_xdrop_max_gap_length);
    
    /**
     * Maximum number of bytes of Dozeu scratch space to retain permanently for each thread.
     */
    static constexpr size_t THREAD_MAX_RETAINED_BYTES = 2ULL * 1024 * 1024 * 1024;

protected:
    /**
     * Represents a correspondance between a position in the subgraph we are
     * mapping to and a position in the read we are mapping.
     */
    struct graph_pos_s {
        /// What index in the node list of our extracted subgraph is our node at?
        size_t node_index;
        /// What is the offset in the node? Note that we only think about the forward strand.
        uint32_t ref_offset;
        /// What is the correspondign offset in the query sequence?
        uint32_t query_offset;
    };
    
    /**
     * Represents a HandleGraph with a defined (topological) order calculated for it.
     */
    struct OrderedGraph {
        OrderedGraph(const HandleGraph& graph, const vector<handle_t>& order);
        void for_each_neighbor(const size_t i, bool go_left, const function<void(size_t)>& lambda) const;
        size_t size() const;
        
        const HandleGraph& graph;
        const vector<handle_t>& order;
        unordered_map<handle_t, size_t> index_of;
    };
    
    // wrappers for dozeu functions that can be used to toggle between between quality
    // adjusted and standard alignments
    virtual dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual,
                                           int8_t full_length_bonus, size_t len) = 0;
    virtual dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual,
                                           int8_t full_length_bonus, size_t len) = 0;
    virtual const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
                                       size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
                                       uint16_t xt) = 0;
    virtual const dz_forefront_s* extend(const dz_query_s* query, const dz_forefront_s** forefronts,
                                         size_t n_forefronts, const char* ref, int32_t rlen,
                                         uint32_t rid, uint16_t xt) = 0;
    virtual dz_alignment_s* trace(const dz_forefront_s* forefront) = 0;
    virtual void flush() = 0;
    
    /// Given the subgraph we are aligning to, the MEM hist against it, the
    /// length of the query, and the direction we are aligning the query in
    /// (true = forward), select a single anchoring match between the graph
    /// and the query to align out from.
    ///
    /// This replaces scan_seed_position for the case where we have MEMs.
    graph_pos_s calculate_seed_position(const OrderedGraph& graph, const vector<MaximalExactMatch>& mems,
                                        size_t query_length, bool direction) const;
    /// Given the index of the node at which the winning score occurs, find
    /// the position in the node and read sequence at which the winning
    /// match is found.
    graph_pos_s calculate_max_position(const OrderedGraph& graph, const graph_pos_s& seed_pos,
                                       size_t max_node_index, bool direction,
                                       const vector<const dz_forefront_s*>& forefronts);
    
    /// If no seeds are provided as alignment input, we need to compute our own starting anchor position. This function does that.
    /// Takes the topologically-sorted graph, the query sequence, and the direction.
    /// If direction is false, finds a seed hit on the first node of the graph. If it is true, finds a hit on the last node.
    ///
    /// This replaces calculate_seed_position for the case where we have no MEMs.
    ///
    /// The bool return with the position indicates whether the scan succeeded or failed.
    /// If the scan failed, then the alignment should not be attempted.
    pair<graph_pos_s, bool> scan_seed_position(const OrderedGraph& graph, const Alignment& alignment,
                                               bool direction, vector<const dz_forefront_s*>& forefronts,
                                               int8_t full_length_bonus, uint16_t max_gap_length);
    
    /// Append an edit at the end of the current mapping array.
    /// Returns the length passed in.
    size_t push_edit(Mapping *mapping, uint8_t op, const char* alt, size_t len) const;
    
    /// Do alignment. Takes the graph, the sorted packed edges in
    /// ascending order for a forward pass or descending order for a
    /// reverse pass, the packed query sequence, the index of the seed node
    /// in the graph, the offset (TODO: in the read?) of the seed position,
    /// and the direction to traverse the graph topological order.
    ///
    /// Note that we take our direction as right_to_left, whole many other
    /// functions take it as left_to_right.
    ///
    /// If a MEM seed is provided, this is run in two passes. The first is
    /// left to right (right_to_left = false) if align did not have
    /// reverse_complement set and the second is right to left (right_to_left =
    /// true).
    ///
    /// If we have no MEM seed, we only run one pass (the second one).
    ///
    /// Returns the index in the topological order of the node with the
    /// highest scoring alignment.
    ///
    /// Note that if no non-empty local alignment is found, it may not be
    /// safe to call dz_calc_max_qpos on the associated forefront!
    size_t do_poa(const OrderedGraph& graph, const dz_query_s* packed_query,
                  const vector<graph_pos_s>& seed_positions, bool right_to_left,
                  vector<const dz_forefront_s*>& forefronts, uint16_t);
    
    /**
     * After all the alignment work has been done, do the traceback and
     * save into the given Alignment object.
     *
     * If left_to_right is true, the nodes were filled left to right, and
     * the internal traceback will come out in left to right order, so we
     * can emit it as is. If it is false, the nodes were filled right to
     * left, and the internal traceback comes out in right to left order,
     * so we need to flip it.
     *
     * All Mappings in the Alignment other than the first will have a zero
     * offset in their Positions.
     */
    void calculate_and_save_alignment(Alignment& alignment, const OrderedGraph& graph,
                                      const vector<graph_pos_s>& head_positions,
                                      size_t tail_node_index, bool left_to_right,
                                      const vector<const dz_forefront_s*>& forefronts);
    
    // void debug_print(Alignment const &alignment, OrderedGraph const &graph, MaximalExactMatch const &seed, bool reverse_complemented);
    // bench_t bench;
    
    /// After doing the upward pass and finding head_pos to anchor from, do
    /// the downward alignment pass and traceback. If left_to_right is
    /// set, goes left to right and traces back the other way. If it is
    /// unset, goes right to left and traces back the other way.
    void align_downward(Alignment &alignment, const OrderedGraph& graph,
                        const vector<graph_pos_s>& head_positions,
                        bool left_to_right, vector<const dz_forefront_s*>& forefronts,
                        int8_t full_length_bonus, uint16_t max_gap_length);
    
    
    /// The core dozeu class, which does the alignments
    dz_s* dz = nullptr;
};

/*
 * A dozeu-backed X-drop aligner that does not use base qualities
 * to adjust alignment scores.
 */
class XdropAligner : public DozeuInterface {
public:
    
    /// Main constructor. Expects a 4 x 4 score matrix.
    XdropAligner(const int8_t* _score_matrix,
                 int8_t _gap_open,
                 int8_t _gap_extension);
    
    // see DozeuInterface::align and DozeuInterface::align_pinned below for alignment
    // interface
    
private:
    
    // implementations of virtual functions from DozeuInterface
    dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
    dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
    const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
                               size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
                               uint16_t xt);
    const dz_forefront_s* extend(const dz_query_s* query, const dz_forefront_s** forefronts,
                                 size_t n_forefronts, const char* ref, int32_t rlen,
                                 uint32_t rid, uint16_t xt);
    dz_alignment_s* trace(const dz_forefront_s* forefront);
    void flush();
    
public:
    
    XdropAligner() = default;
    ~XdropAligner(void);
    
    /// Copy constructor
    XdropAligner(const XdropAligner& other);
    /// Copy assignment
    XdropAligner& operator=(const XdropAligner& other);
    /// Move constructor
    XdropAligner(XdropAligner&& other);
    /// Move assignment
    XdropAligner& operator=(XdropAligner&& other);
};

/*
 * A dozeu-backed X-drop aligner that uses base qualities to adjust
 * alignment scores.
 */
class QualAdjXdropAligner : public DozeuInterface {
public:
    
    /// Main constructor. Expects a 4 x 4 score matrix and a 4 x 4 x 64 quality adjusted matrix
    QualAdjXdropAligner(const int8_t* _score_matrix,
                        const int8_t* _qual_adj_score_matrix,
                        int8_t _gap_open,
                        int8_t _gap_extension);
    
    
    // see DozeuInterface::align and DozeuInterface::align_pinned below for alignment
    // interface
    
private:
    
    // implementations of virtual functions from DozeuInterface
    dz_query_s* pack_query_forward(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
    dz_query_s* pack_query_reverse(const char* seq, const uint8_t* qual, int8_t full_length_bonus, size_t len);
    const dz_forefront_s* scan(const dz_query_s* query, const dz_forefront_s** forefronts,
                                       size_t n_forefronts, const char* ref, int32_t rlen, uint32_t rid,
                                       uint16_t xt);
    const dz_forefront_s* extend(const dz_query_s* query, const dz_forefront_s** forefronts,
                                         size_t n_forefronts, const char* ref, int32_t rlen,
                                         uint32_t rid, uint16_t xt);
    dz_alignment_s* trace(const dz_forefront_s* forefront);
    void flush();
    
public:
    
    QualAdjXdropAligner() = default;
    ~QualAdjXdropAligner(void);
    
    /// Copy constructor
    QualAdjXdropAligner(const QualAdjXdropAligner& other);
    /// Copy assignment
    QualAdjXdropAligner& operator=(const QualAdjXdropAligner& other);
    /// Move constructor
    QualAdjXdropAligner(QualAdjXdropAligner&& other);
    /// Move assignment
    QualAdjXdropAligner& operator=(QualAdjXdropAligner&& other);
};

} // end of namespace vg

#endif // VG_DOZEU_INTERFACE_HPP_INCLUDED
/**
 * end of dozeu_interface.hpp
 */
