98 lines
2.6 KiB
C++
98 lines
2.6 KiB
C++
/*
|
|
This header is meant to be a framework for project where links must be pattern matched,
|
|
followed and downloaded from a site.
|
|
Further optimazations are required.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
#include <regex>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <fstream>
|
|
#include <curl/curl.h>
|
|
|
|
// size_t store(char *buffer, size_t size, size_t n, void* v);
|
|
// std::string site_text(const char* site){
|
|
// template<typename T, typename S> size_t findall(T &t, S source, const char* pattern){
|
|
// template<typename T> size_t site_findall(T &t, const char* site, const char* pattern){
|
|
// template<typename T> size_t dllist(const T &t){
|
|
|
|
namespace bot{
|
|
|
|
const char href[] = R"d(.*href=["'](.*)["'].*)d";
|
|
|
|
size_t store(char *buffer, size_t size, size_t n, void* v);
|
|
std::string site_text(const char* site);
|
|
template<typename T> size_t findall(T &t, const char* source, const char* pattern);
|
|
template<typename T> size_t site_findall(T &t, const char* site, const char* pattern);
|
|
template<typename T> size_t dllist(const T &t);
|
|
template<typename T> size_t dlrellist(const std::string &domain, const T &t);
|
|
|
|
size_t store(char *buffer, size_t size, size_t n, void *v){
|
|
*((std::string*)v) += buffer;
|
|
return size*n;
|
|
}
|
|
|
|
std::string site_text(const char* site){
|
|
std::string buffer;
|
|
|
|
auto c = curl_easy_init();
|
|
if(!c){ throw 1; }
|
|
curl_easy_setopt(c, CURLOPT_URL, site);
|
|
curl_easy_setopt(c, CURLOPT_WRITEFUNCTION, store);
|
|
curl_easy_setopt(c, CURLOPT_WRITEDATA, (void*)&buffer);
|
|
curl_easy_perform(c);
|
|
|
|
return buffer;
|
|
}
|
|
|
|
template<typename T, typename S> size_t findall(T &t, S source, const char* pattern){
|
|
std::string s = source;
|
|
|
|
std::regex r(pattern);
|
|
std::sregex_iterator begin(s.begin(), s.end(), r);
|
|
std::sregex_iterator end;
|
|
|
|
for(auto i = begin; i != end; i++){
|
|
t.push_back((*i)[1]);
|
|
}
|
|
|
|
return t.size();
|
|
}
|
|
|
|
template<typename T> size_t site_findall(T &t, const char* site, const char* pattern){
|
|
return findall(t, site_text(site).c_str(), pattern);
|
|
}
|
|
|
|
template<typename T> size_t dllist(const T &t){
|
|
std::ofstream output;
|
|
std::string buffer;
|
|
|
|
auto cd = curl_easy_init();
|
|
if(!cd){ return 1; }
|
|
|
|
for(auto i : t){
|
|
curl_easy_setopt(cd, CURLOPT_URL, i.c_str());
|
|
curl_easy_setopt(cd, CURLOPT_WRITEFUNCTION, store);
|
|
curl_easy_setopt(cd, CURLOPT_WRITEDATA, (void*)&buffer);
|
|
curl_easy_perform(cd);
|
|
|
|
std::string name;
|
|
name.resize(i.size());
|
|
std::replace_copy(i.begin(), i.end(), name.begin(), '/', '_');
|
|
|
|
if(i.size() != 0){
|
|
output.open(name);
|
|
output << buffer;
|
|
output.close();
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
}
|