[go: up one dir, main page]

File: feed.hpp

package info (click to toggle)
syndication-domination 1.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 408 kB
  • sloc: cpp: 1,101; makefile: 42; sh: 10
file content (142 lines) | stat: -rw-r--r-- 4,036 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#pragma once

#include <pugixml.hpp>
#include <string>
#include <vector>

#include "utils.hpp"
#include "feed_type.hpp"
#include "feed_item.hpp"
#include "extraction_param.hpp"

using namespace pugi;

/**
* Represents an RSS/Atom feed.
*
* Upon construction it will try its best to parse useful information out of
* the provided file.
*
* In case some value cannot be found, it will just contain an empty string.
*/
class Feed {
private:

    std::string path;
    xml_document doc;
    xml_node feed_node;

    FeedType type{FeedType::INVALID};

    std::string title;
    std::string description;
    std::string url;
    std::string last_update;
    std::string img_url;
    std::string rss_url;

    std::vector<FeedItem> feed_items{};

    /**
    * Verifies if the feed is a valid RSS or Atom file; also sets the type
    * variable.
    */
    bool verify_feed();

    /**
    * Returns the xml_node containing the feed, which is `channel` for RSS
    * or the root node for Atom.
    */
    xml_node get_feed_node();

    /**
    * Makes relative urls absolute and/or adds the protocol if not present.
    * This fixing is done in place on the string passed.
    */
    void fix_url(std::string &s);

    /**
    * Tries to extract the website url and returns it.
    */
    std::string extract_url();

    static inline const std::vector<ExtractionParam> __LAST_UPDATE_PARAMS{
        {ExtractionParam::ParamType::CHILD, {"updated"}},
        {ExtractionParam::ParamType::CHILD, {"lastBuildDate"}},
        {ExtractionParam::ParamType::CHILD, {"pubDate"}},
        {ExtractionParam::ParamType::CHILD, {"dc:date"}}
    };
    /**
    * Tries to extract and return the last update date and time, alternatively
    * returns the current date and time.
    */
    std::string extract_last_update();

    static inline const std::vector<ExtractionParam> __IMG_URL_PARAMS{
        {ExtractionParam::ParamType::CHILD, {"icon"}},
        {ExtractionParam::ParamType::CHILD, {"image", "url"}},
        {ExtractionParam::ParamType::CHILD, {"image"}},
        {ExtractionParam::ParamType::CHILD, {"logo"}},
        {ExtractionParam::ParamType::ATTRIBUTE, {"itunes:image"}, "href"}
    };
    /**
    * Tries to extract an image url representing the feed (icon, logo...) and
    * returns it.
    */
    std::string extract_img_url();

    /**
    * Tries to extract the url to the same feed being parsed and returns it.
    * This is useful for updating the feed in the future.
    */
    std::string extract_rss_url();

    static inline const std::vector<ExtractionParam> __DESCRIPTION_PARAMS{
        {ExtractionParam::ParamType::CHILD, {"description"}},
        {ExtractionParam::ParamType::CHILD, {"subtitle"}}
    };
    /**
    * Extracts all of the feed data and assigns the relevant internal
    * fields.
    */
    void extract_feed_data();

    /**
    * Extracts all of the items (articles, posts) in the feed and stores them
    * inside the `feed_items` vector as FeedItem objects.
    */
    void extract_feed_items();

    /**
    * Entry point of the class, parses all the relevant content. Called by
    * the constructor.
    */
    void parse();

public:

    /**
    * Constructs the Feed object from a valid RSS/Atom file path.
    * It will also automatically construct a vector of FeedItem objects
    * representing the feed items or articles found in the feed.
    * 
    * @param path a valid file path to an RSS or Atom XML file.
    */
    Feed(std::string path) : path{path} {
        parse();
    }

    std::string get_title() { return title; }
    std::string get_description() { return description; }
    std::string get_url() { return url; }
    std::string get_last_update() { return last_update; }
    std::string get_img_url() { return img_url; }
    std::string get_rss_url() { return rss_url; }

    std::vector<FeedItem> get_items() { return feed_items; }

    /**
    * Represents the Feed object (itself) as a json, returned as a string.
    */
    std::string to_json(bool no_items=false);
};