Compare commits
2 Commits
d9cf6f02df
...
d15c113854
Author | SHA1 | Date |
---|---|---|
Ashik K | d15c113854 | |
Ashik K | b3a33bb137 |
|
@ -1,2 +1,3 @@
|
||||||
/build/
|
/build/
|
||||||
/build/*
|
/build/*
|
||||||
|
.vscode/c_cpp_properties.json
|
||||||
|
|
112
fetcher.cc
112
fetcher.cc
|
@ -1,83 +1,83 @@
|
||||||
#include <sstream>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <curlpp/cURLpp.hpp>
|
|
||||||
#include <curlpp/Easy.hpp>
|
#include <curlpp/Easy.hpp>
|
||||||
#include <curlpp/Options.hpp>
|
|
||||||
#include <curlpp/Exception.hpp>
|
#include <curlpp/Exception.hpp>
|
||||||
|
#include <curlpp/Options.hpp>
|
||||||
|
#include <curlpp/cURLpp.hpp>
|
||||||
|
|
||||||
#define DEBUG_LEVEL -1
|
#define DEBUG_LEVEL -1
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[]) {
|
||||||
{
|
|
||||||
std::string exbid, apikey;
|
std::string exbid, apikey;
|
||||||
if (argc !=7 ) {
|
if (argc != 7) {
|
||||||
std::cout<<"Usage: "<<argv[0] <<" -e <exhibition id> -k <api key> -e <number of records>"<<std::endl;
|
std::cout << "Usage: " << argv[0]
|
||||||
|
<< " -e <exhibition id> -k <api key> -e <number of records>"
|
||||||
|
<< std::endl;
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
int c;
|
int c;
|
||||||
int numrec = 0;
|
int numrec = 0;
|
||||||
while ((c = getopt (argc, argv, "e:k:n:")) != -1)
|
while ((c = getopt(argc, argv, "e:k:n:")) != -1)
|
||||||
switch(c)
|
switch (c) {
|
||||||
{
|
case 'e':
|
||||||
case 'e':
|
exbid = optarg;
|
||||||
exbid = optarg;
|
break;
|
||||||
break;
|
case 'k':
|
||||||
case 'k':
|
apikey = optarg;
|
||||||
apikey = optarg;
|
break;
|
||||||
break;
|
case 'n':
|
||||||
case 'n':
|
numrec = atoi(optarg);
|
||||||
numrec = atoi(optarg);
|
break;
|
||||||
break;
|
default:
|
||||||
default:
|
break;
|
||||||
break;
|
}
|
||||||
}
|
std::cout << numrec << " records to fetch\n";
|
||||||
std::cout<<numrec<<" records to fetch\n";
|
|
||||||
int numfetches = (numrec / 100) + 1;
|
int numfetches = (numrec / 100) + 1;
|
||||||
char url[256], outfilename[64];
|
char url[256], outfilename[64];
|
||||||
for (int i = 0; i<numfetches; i++) {
|
for (int i = 0; i < numfetches; i++) {
|
||||||
snprintf(
|
snprintf(url, sizeof url,
|
||||||
url,
|
"https://api.dimu.org/api/solr/"
|
||||||
sizeof url,
|
"select?q=Kosta&wt=json&fq=(artifact.exhibitionUids:\"%s\")&start="
|
||||||
"https://api.dimu.org/api/solr/select?q=Kosta&wt=json&fq=(artifact.exhibitionUids:\"%s\")&start=%d&rows=100&api.key=%s", exbid.c_str(), i*100, apikey.c_str());
|
"%d&rows=100&api.key=%s",
|
||||||
|
exbid.c_str(), i * 100, apikey.c_str());
|
||||||
|
|
||||||
std::cout<<url<<"\n";
|
std::cout << url << "\n";
|
||||||
|
|
||||||
snprintf(outfilename, sizeof outfilename, "data_%d.json", i);
|
snprintf(outfilename, sizeof outfilename, "data_%d.json", i);
|
||||||
|
|
||||||
if (DEBUG_LEVEL > 0) {
|
|
||||||
std::cout<<"url to fetch is "<<std::endl<<url<<std::endl;
|
|
||||||
std::cout<<"writing output to "<<outfilename<<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
curlpp::Cleanup cleaner;
|
|
||||||
curlpp::Easy request1;
|
|
||||||
|
|
||||||
// Setting the URL to retrive.
|
if (DEBUG_LEVEL > 0) {
|
||||||
request1.setOpt(new curlpp::options::Url(url));
|
std::cout << "url to fetch is " << std::endl << url << std::endl;
|
||||||
|
std::cout << "writing output to " << outfilename << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
if (DEBUG_LEVEL > 0) std::cout << request1 << std::endl;
|
try {
|
||||||
|
curlpp::Cleanup cleaner;
|
||||||
|
curlpp::Easy request1;
|
||||||
|
|
||||||
std::ofstream outfile;
|
// Setting the URL to retrive.
|
||||||
outfile.open (outfilename);
|
request1.setOpt(new curlpp::options::Url(url));
|
||||||
outfile << curlpp::options::Url(url) << std::endl;
|
|
||||||
outfile.close();
|
if (DEBUG_LEVEL > 0)
|
||||||
|
std::cout << request1 << std::endl;
|
||||||
|
|
||||||
|
std::ofstream outfile;
|
||||||
|
outfile.open(outfilename);
|
||||||
|
outfile << curlpp::options::Url(url) << std::endl;
|
||||||
|
outfile.close();
|
||||||
|
} catch (curlpp::LogicError &e) {
|
||||||
|
if (DEBUG_LEVEL > 0)
|
||||||
|
std::cout << e.what() << std::endl;
|
||||||
|
} catch (curlpp::RuntimeError &e) {
|
||||||
|
if (DEBUG_LEVEL > 0)
|
||||||
|
std::cout << e.what() << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch ( curlpp::LogicError & e ) {
|
return 0;
|
||||||
if (DEBUG_LEVEL > 0) std::cout << e.what() << std::endl;
|
|
||||||
}
|
|
||||||
catch ( curlpp::RuntimeError & e ) {
|
|
||||||
if (DEBUG_LEVEL > 0) std::cout << e.what() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
429
parser.cc
429
parser.cc
|
@ -1,8 +1,8 @@
|
||||||
#include <rapidjson/document.h>
|
#include <rapidjson/document.h>
|
||||||
#include <rapidjson/filereadstream.h>
|
#include <rapidjson/filereadstream.h>
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
@ -12,45 +12,46 @@
|
||||||
|
|
||||||
#define debug_level 0
|
#define debug_level 0
|
||||||
std::string yearstr(int year) {
|
std::string yearstr(int year) {
|
||||||
return year==-1? "Unknown": std::to_string(year);
|
return year == -1 ? "Unknown" : std::to_string(year);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv) {
|
||||||
{
|
|
||||||
int c;
|
int c;
|
||||||
bool download_mode =false;
|
bool download_mode = false;
|
||||||
int numfiles = 0;
|
int numfiles = 0;
|
||||||
while ((c = getopt (argc, argv, "d:n:")) != -1)
|
while ((c = getopt(argc, argv, "d:n:")) != -1)
|
||||||
switch(c)
|
switch (c) {
|
||||||
{
|
case 'd': // download mode
|
||||||
case 'd': // download mode
|
download_mode = true;
|
||||||
download_mode = true;
|
break;
|
||||||
break;
|
case 'n': // number of files to process
|
||||||
case 'n': // number of files to process
|
numfiles = atoi(optarg);
|
||||||
numfiles = atoi(optarg);
|
break;
|
||||||
break;
|
default:
|
||||||
default:
|
break;
|
||||||
break;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
char filename[64];
|
char filename[64];
|
||||||
snprintf(filename, sizeof filename, "out.csv");
|
snprintf(filename, sizeof filename, "out.csv");
|
||||||
std::ofstream out_file1;
|
std::ofstream out_file1;
|
||||||
out_file1.open(filename);
|
out_file1.open(filename);
|
||||||
out_file1<<"id, Caption/title, production start year, end year, Description, Item url, Image Source, image_filename, subjects, date published, collection name, museum name, exif_model, exif_iso, exif_focallength, exif_exposuretime, exif_aperture, exif_datetimeoriginal, liceses\n";
|
out_file1 << "id, Caption/title, production start year, end year, "
|
||||||
for (auto i = 0; i<numfiles; i++) {
|
"Description, Item url, Image Source, image_filename, subjects, "
|
||||||
|
"date published, collection name, museum name, exif_model, "
|
||||||
|
"exif_iso, exif_focallength, exif_exposuretime, exif_aperture, "
|
||||||
|
"exif_datetimeoriginal, liceses\n";
|
||||||
|
for (auto i = 0; i < numfiles; i++) {
|
||||||
char in_file_i[64];
|
char in_file_i[64];
|
||||||
snprintf(in_file_i, sizeof in_file_i, "data_%d.json", i);
|
snprintf(in_file_i, sizeof in_file_i, "data_%d.json", i);
|
||||||
// Open the file
|
// Open the file
|
||||||
FILE* fp = fopen(in_file_i, "rb");
|
FILE *fp = fopen(in_file_i, "rb");
|
||||||
// Check if the file was opened successfully
|
// Check if the file was opened successfully
|
||||||
if (!fp) {
|
if (!fp) {
|
||||||
std::cerr << "Error: unable to open file"
|
std::cerr << "Error: unable to open file" << std::endl;
|
||||||
<< std::endl;
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the file
|
// Read the file
|
||||||
char readBuffer[192000];
|
char readBuffer[192000];
|
||||||
rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer));
|
rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer));
|
||||||
|
|
||||||
|
@ -60,8 +61,7 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
// Check if the document is valid
|
// Check if the document is valid
|
||||||
if (doc.HasParseError()) {
|
if (doc.HasParseError()) {
|
||||||
std::cerr << "Error: failed to parse JSON document"
|
std::cerr << "Error: failed to parse JSON document" << std::endl;
|
||||||
<< std::endl;
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -69,16 +69,14 @@ int main(int argc, char **argv)
|
||||||
// Close the file
|
// Close the file
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
|
|
||||||
if (doc.HasMember("response")
|
if (doc.HasMember("response") && doc["response"].IsObject()) {
|
||||||
&& doc["response"].IsObject()) {
|
const rapidjson::Value &response = doc["response"];
|
||||||
const rapidjson::Value& response = doc["response"];
|
if (response.HasMember("docs") && response["docs"].IsArray()) {
|
||||||
if (response.HasMember("docs")
|
|
||||||
&& response["docs"].IsArray()) {
|
|
||||||
for (rapidjson::SizeType i = 0; i < response["docs"].Size(); i++) {
|
for (rapidjson::SizeType i = 0; i < response["docs"].Size(); i++) {
|
||||||
//std::cout<<i<<std::endl;
|
// std::cout<<i<<std::endl;
|
||||||
int yearb = -1, yeare = -1;
|
int yearb = -1, yeare = -1;
|
||||||
std::string title = "";
|
std::string title = "";
|
||||||
const rapidjson::Value& article = response["docs"][i];
|
const rapidjson::Value &article = response["docs"][i];
|
||||||
|
|
||||||
std::string article_id = "";
|
std::string article_id = "";
|
||||||
if (article.HasMember("identifier.id")) {
|
if (article.HasMember("identifier.id")) {
|
||||||
|
@ -86,204 +84,221 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (article.HasMember("artifact.ingress.title")) {
|
if (article.HasMember("artifact.ingress.title")) {
|
||||||
title = article["artifact.ingress.title"].GetString();
|
title = article["artifact.ingress.title"].GetString();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (article.HasMember("artifact.ingress.production.fromYear")) {
|
if (article.HasMember("artifact.ingress.production.fromYear")) {
|
||||||
const rapidjson::Value& yearbval = article["artifact.ingress.production.fromYear"];
|
const rapidjson::Value &yearbval =
|
||||||
|
article["artifact.ingress.production.fromYear"];
|
||||||
yearb = yearbval.GetInt();
|
yearb = yearbval.GetInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (article.HasMember("artifact.ingress.production.toYear")) {
|
if (article.HasMember("artifact.ingress.production.toYear")) {
|
||||||
const rapidjson::Value& yeareval = article["artifact.ingress.production.toYear"];
|
const rapidjson::Value &yeareval =
|
||||||
|
article["artifact.ingress.production.toYear"];
|
||||||
yeare = yeareval.GetInt();
|
yeare = yeareval.GetInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string unique_id = "";
|
std::string unique_id = "";
|
||||||
if (article.HasMember("artifact.uniqueId")) {
|
if (article.HasMember("artifact.uniqueId")) {
|
||||||
const rapidjson::Value& uniqidval = article["artifact.uniqueId"];
|
const rapidjson::Value &uniqidval = article["artifact.uniqueId"];
|
||||||
if (uniqidval.IsString()) {
|
if (uniqidval.IsString()) {
|
||||||
unique_id = uniqidval.GetString();
|
unique_id = uniqidval.GetString();
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string mediaid = "";
|
|
||||||
int picid = -1;
|
|
||||||
std::string picdim = "";
|
|
||||||
|
|
||||||
if (article.HasMember("artifact.hasPictures") && article["artifact.hasPictures"].GetBool() == true) {
|
|
||||||
|
|
||||||
if (article.HasMember("artifact.defaultMediaIdentifier")) {
|
|
||||||
const rapidjson::Value& mediaidv = article["artifact.defaultMediaIdentifier"];
|
|
||||||
if (mediaidv.IsString()) {
|
|
||||||
mediaid = mediaidv.GetString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (article.HasMember("artifact.defaultPictureIndex") && article["artifact.defaultPictureIndex"].IsInt()) {
|
std::string mediaid = "";
|
||||||
picid = article["artifact.defaultPictureIndex"].GetInt();
|
int picid = -1;
|
||||||
}
|
std::string picdim = "";
|
||||||
|
|
||||||
if (article.HasMember("artifact.defaultPictureDimension") && article["artifact.defaultPictureDimension"].IsString()) {
|
if (article.HasMember("artifact.hasPictures") &&
|
||||||
picdim = article["artifact.defaultPictureDimension"].GetString();
|
article["artifact.hasPictures"].GetBool() == true) {
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// image file is at https://mm.dimu.org/image/<identifier>?dimension=<dim>
|
|
||||||
// as per documentation available at http://api.dimu.org/doc/public_api.html
|
|
||||||
char imglink[128];
|
|
||||||
snprintf(imglink, sizeof imglink, "https://mm.dimu.org/image/%s", mediaid.c_str());
|
|
||||||
|
|
||||||
char itemlink[128];
|
if (article.HasMember("artifact.defaultMediaIdentifier")) {
|
||||||
snprintf(itemlink, sizeof itemlink, "https://digitaltmuseum.se/%s", unique_id.c_str());
|
const rapidjson::Value &mediaidv =
|
||||||
|
article["artifact.defaultMediaIdentifier"];
|
||||||
|
if (mediaidv.IsString()) {
|
||||||
|
mediaid = mediaidv.GetString();
|
||||||
char descfilename[64];
|
|
||||||
snprintf(descfilename, sizeof descfilename, "%s.txt", article_id.c_str());
|
|
||||||
|
|
||||||
std::string description = "";
|
|
||||||
if (article.HasMember("artifact.ingress.description")) {
|
|
||||||
const rapidjson::Value& descv = article["artifact.ingress.description"];
|
|
||||||
if (descv.IsString()) {
|
|
||||||
description = descv.GetString();
|
|
||||||
std::replace( description.begin(), description.end(), ',', ':');
|
|
||||||
std::replace( description.begin(), description.end(), '\n', ' ');
|
|
||||||
std::ofstream desc_file;
|
|
||||||
desc_file.open(descfilename);
|
|
||||||
desc_file<<description;
|
|
||||||
desc_file.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string subjects;
|
|
||||||
if (article.HasMember("artifact.ingress.subjects")) {
|
|
||||||
const rapidjson::Value& subjv = article["artifact.ingress.subjects"];
|
|
||||||
if (subjv.IsArray()) {
|
|
||||||
for (auto i = 0; i<subjv.GetArray().Size(); i++) {
|
|
||||||
subjects += subjv[i].GetString();
|
|
||||||
subjects += " ";
|
|
||||||
}
|
|
||||||
std::replace( description.begin(), description.end(), ',', ':');
|
|
||||||
std::replace( description.begin(), description.end(), '\n', ' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string publishdate;
|
|
||||||
if (article.HasMember("artifact.publishedDate")) {
|
|
||||||
const rapidjson::Value& pubdatev = article["artifact.publishedDate"];
|
|
||||||
if (pubdatev.IsString()) {
|
|
||||||
publishdate = pubdatev.GetString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string license = "";
|
|
||||||
if (article.HasMember("artifact.ingress.license")) {
|
|
||||||
if (article[ "artifact.ingress.license"].IsString())
|
|
||||||
license = article[ "artifact.ingress.license"].GetString();
|
|
||||||
else if (article[ "artifact.ingress.license"].IsArray()) {
|
|
||||||
for (auto i = 0; i<article[ "artifact.ingress.license"].Size(); i++) {
|
|
||||||
license += article[ "artifact.ingress.license"][i].GetString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_cc_license = false;
|
|
||||||
if (license.find("CC by")!= std::string::npos) {
|
|
||||||
is_cc_license = true;
|
|
||||||
char imgfetch[256];
|
|
||||||
snprintf(imgfetch, sizeof imgfetch, "wget %s -O \"%s-%s.jpeg\"", imglink, article_id.c_str(), mediaid.c_str());
|
|
||||||
if (debug_level > 0) std::cout<<imgfetch<<std::endl;
|
|
||||||
if (download_mode) {
|
|
||||||
std::cout<<"running in download mode\n";
|
|
||||||
system (imgfetch);
|
|
||||||
if (debug_level >0) std::cout<<"Found CC by license, fetched the image for "<< mediaid<<"\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!download_mode) {
|
|
||||||
if (is_cc_license) {
|
|
||||||
char exif_file[128];
|
|
||||||
snprintf(exif_file, sizeof exif_file, "%s-%s.jpeg.exif.json", article_id.c_str(), mediaid.c_str());
|
|
||||||
// Open the file
|
|
||||||
FILE* exiffp = fopen(exif_file, "rb");
|
|
||||||
if (!exiffp) {
|
|
||||||
std::cerr << "Error: unable to open file" << std::string(exif_file)
|
|
||||||
<< std::endl;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
char exifreadBuffer[4096];
|
|
||||||
rapidjson::FileReadStream exifis(exiffp, exifreadBuffer, sizeof(exifreadBuffer));
|
|
||||||
rapidjson::Document exifdoc;
|
|
||||||
exifdoc.ParseStream(exifis);
|
|
||||||
|
|
||||||
// Check if the document is valid
|
|
||||||
if (exifdoc.HasParseError()) {
|
|
||||||
std::cerr << "Error: failed to parse JSON document exif data"
|
|
||||||
<< std::endl;
|
|
||||||
}
|
|
||||||
fclose(exiffp);
|
|
||||||
|
|
||||||
std::string exif_model, exif_iso, exif_focallength, exif_exposuretime, exif_aperture, exif_fnumber, exif_datetimeoriginal;
|
|
||||||
if (exifdoc.IsArray()) {
|
|
||||||
if (exifdoc[0].IsObject()) {
|
|
||||||
if (exifdoc[0].GetObject().HasMember("Model")) {
|
|
||||||
exif_model = exifdoc[0].GetObject()["Model"].GetString();
|
|
||||||
}
|
|
||||||
if (exifdoc[0].GetObject().HasMember("ISO")) {
|
|
||||||
exif_iso = std::to_string(exifdoc[0].GetObject()["ISO"].GetInt());
|
|
||||||
}
|
|
||||||
if (exifdoc[0].GetObject().HasMember("FocalLength")) {
|
|
||||||
exif_focallength = exifdoc[0].GetObject()["FocalLength"].GetString();
|
|
||||||
}
|
|
||||||
if (exifdoc[0].GetObject().HasMember("ExposureTime")) {
|
|
||||||
if (exifdoc[0].GetObject()["ExposureTime"].IsString())
|
|
||||||
exif_exposuretime = exifdoc[0].GetObject()["ExposureTime"].GetString();
|
|
||||||
}
|
|
||||||
if (exifdoc[0].GetObject().HasMember("ApertureValue")) {
|
|
||||||
exif_aperture = std::to_string(exifdoc[0].GetObject()["ApertureValue"].GetDouble());
|
|
||||||
}
|
|
||||||
if (exifdoc[0].GetObject().HasMember("DateTimeOriginal")) {
|
|
||||||
exif_datetimeoriginal = exifdoc[0].GetObject()["DateTimeOriginal"].GetString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (article.HasMember("artifact.defaultPictureIndex") &&
|
||||||
|
article["artifact.defaultPictureIndex"].IsInt()) {
|
||||||
|
picid = article["artifact.defaultPictureIndex"].GetInt();
|
||||||
|
}
|
||||||
|
|
||||||
out_file1<<
|
if (article.HasMember("artifact.defaultPictureDimension") &&
|
||||||
/* article["artifact.defaultMediaIdentifier"].GetString()<<
|
article["artifact.defaultPictureDimension"].IsString()) {
|
||||||
", "<< article["artifact.defaultPictureIndex"].GetInt()<<
|
picdim = article["artifact.defaultPictureDimension"].GetString();
|
||||||
", "<< article["artifact.defaultPictureDimension"].GetString()<<
|
}
|
||||||
", "<<*/
|
}
|
||||||
article_id <<
|
|
||||||
", "<< title <<
|
// image file is at
|
||||||
", "<< yearstr(yearb) <<
|
// https://mm.dimu.org/image/<identifier>?dimension=<dim>
|
||||||
", "<< yearstr(yeare) <<
|
// as per documentation available at
|
||||||
", "<< description <<
|
// http://api.dimu.org/doc/public_api.html
|
||||||
", "<< itemlink <<
|
char imglink[128];
|
||||||
", "<< imglink <<
|
snprintf(imglink, sizeof imglink, "https://mm.dimu.org/image/%s",
|
||||||
", "<< article_id+"-"+ mediaid +".jpeg" <<
|
mediaid.c_str());
|
||||||
", "<< subjects <<
|
|
||||||
", "<< publishdate <<
|
char itemlink[128];
|
||||||
", "<<"Länge leve Kosta! exhibition" <<
|
snprintf(itemlink, sizeof itemlink, "https://digitaltmuseum.se/%s",
|
||||||
", "<<"Kulturparken Småland / Smålands museum" <<
|
unique_id.c_str());
|
||||||
", "<< exif_model <<
|
|
||||||
", "<< exif_iso <<
|
char descfilename[64];
|
||||||
", "<< exif_focallength <<
|
snprintf(descfilename, sizeof descfilename, "%s.txt",
|
||||||
", "<< exif_exposuretime <<
|
article_id.c_str());
|
||||||
", "<< exif_aperture <<
|
|
||||||
", "<< exif_datetimeoriginal<<
|
std::string description = "";
|
||||||
", "<< license <<
|
if (article.HasMember("artifact.ingress.description")) {
|
||||||
std::endl;
|
const rapidjson::Value &descv =
|
||||||
|
article["artifact.ingress.description"];
|
||||||
|
if (descv.IsString()) {
|
||||||
|
description = descv.GetString();
|
||||||
|
std::replace(description.begin(), description.end(), ',', ':');
|
||||||
|
std::replace(description.begin(), description.end(), '\n', ' ');
|
||||||
|
std::ofstream desc_file;
|
||||||
|
desc_file.open(descfilename);
|
||||||
|
desc_file << description;
|
||||||
|
desc_file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string subjects;
|
||||||
|
if (article.HasMember("artifact.ingress.subjects")) {
|
||||||
|
const rapidjson::Value &subjv =
|
||||||
|
article["artifact.ingress.subjects"];
|
||||||
|
if (subjv.IsArray()) {
|
||||||
|
for (auto i = 0; i < subjv.GetArray().Size(); i++) {
|
||||||
|
subjects += subjv[i].GetString();
|
||||||
|
subjects += " ";
|
||||||
|
}
|
||||||
|
std::replace(description.begin(), description.end(), ',', ':');
|
||||||
|
std::replace(description.begin(), description.end(), '\n', ' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string publishdate;
|
||||||
|
if (article.HasMember("artifact.publishedDate")) {
|
||||||
|
const rapidjson::Value &pubdatev =
|
||||||
|
article["artifact.publishedDate"];
|
||||||
|
if (pubdatev.IsString()) {
|
||||||
|
publishdate = pubdatev.GetString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string license = "";
|
||||||
|
if (article.HasMember("artifact.ingress.license")) {
|
||||||
|
if (article["artifact.ingress.license"].IsString())
|
||||||
|
license = article["artifact.ingress.license"].GetString();
|
||||||
|
else if (article["artifact.ingress.license"].IsArray()) {
|
||||||
|
for (auto i = 0; i < article["artifact.ingress.license"].Size();
|
||||||
|
i++) {
|
||||||
|
license += article["artifact.ingress.license"][i].GetString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_cc_license = false;
|
||||||
|
if (license.find("CC by") != std::string::npos) {
|
||||||
|
is_cc_license = true;
|
||||||
|
char imgfetch[256];
|
||||||
|
snprintf(imgfetch, sizeof imgfetch, "wget %s -O \"%s-%s.jpeg\"",
|
||||||
|
imglink, article_id.c_str(), mediaid.c_str());
|
||||||
|
if (debug_level > 0)
|
||||||
|
std::cout << imgfetch << std::endl;
|
||||||
|
if (download_mode) {
|
||||||
|
std::cout << "running in download mode\n";
|
||||||
|
system(imgfetch);
|
||||||
|
if (debug_level > 0)
|
||||||
|
std::cout << "Found CC by license, fetched the image for "
|
||||||
|
<< mediaid << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!download_mode) {
|
||||||
|
if (is_cc_license) {
|
||||||
|
char exif_file[128];
|
||||||
|
snprintf(exif_file, sizeof exif_file, "%s-%s.jpeg.exif.json",
|
||||||
|
article_id.c_str(), mediaid.c_str());
|
||||||
|
// Open the file
|
||||||
|
FILE *exiffp = fopen(exif_file, "rb");
|
||||||
|
if (!exiffp) {
|
||||||
|
std::cerr << "Error: unable to open file"
|
||||||
|
<< std::string(exif_file) << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char exifreadBuffer[4096];
|
||||||
|
rapidjson::FileReadStream exifis(exiffp, exifreadBuffer,
|
||||||
|
sizeof(exifreadBuffer));
|
||||||
|
rapidjson::Document exifdoc;
|
||||||
|
exifdoc.ParseStream(exifis);
|
||||||
|
|
||||||
|
// Check if the document is valid
|
||||||
|
if (exifdoc.HasParseError()) {
|
||||||
|
std::cerr << "Error: failed to parse JSON document exif data"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
fclose(exiffp);
|
||||||
|
|
||||||
|
std::string exif_model, exif_iso, exif_focallength,
|
||||||
|
exif_exposuretime, exif_aperture, exif_fnumber,
|
||||||
|
exif_datetimeoriginal;
|
||||||
|
if (exifdoc.IsArray()) {
|
||||||
|
if (exifdoc[0].IsObject()) {
|
||||||
|
if (exifdoc[0].GetObject().HasMember("Model")) {
|
||||||
|
exif_model = exifdoc[0].GetObject()["Model"].GetString();
|
||||||
|
}
|
||||||
|
if (exifdoc[0].GetObject().HasMember("ISO")) {
|
||||||
|
exif_iso =
|
||||||
|
std::to_string(exifdoc[0].GetObject()["ISO"].GetInt());
|
||||||
|
}
|
||||||
|
if (exifdoc[0].GetObject().HasMember("FocalLength")) {
|
||||||
|
exif_focallength =
|
||||||
|
exifdoc[0].GetObject()["FocalLength"].GetString();
|
||||||
|
}
|
||||||
|
if (exifdoc[0].GetObject().HasMember("ExposureTime")) {
|
||||||
|
if (exifdoc[0].GetObject()["ExposureTime"].IsString())
|
||||||
|
exif_exposuretime =
|
||||||
|
exifdoc[0].GetObject()["ExposureTime"].GetString();
|
||||||
|
}
|
||||||
|
if (exifdoc[0].GetObject().HasMember("ApertureValue")) {
|
||||||
|
exif_aperture = std::to_string(
|
||||||
|
exifdoc[0].GetObject()["ApertureValue"].GetDouble());
|
||||||
|
}
|
||||||
|
if (exifdoc[0].GetObject().HasMember("DateTimeOriginal")) {
|
||||||
|
exif_datetimeoriginal =
|
||||||
|
exifdoc[0].GetObject()["DateTimeOriginal"].GetString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out_file1 <<
|
||||||
|
/* article["artifact.defaultMediaIdentifier"].GetString()<<
|
||||||
|
", "<< article["artifact.defaultPictureIndex"].GetInt()<<
|
||||||
|
", "<<
|
||||||
|
article["artifact.defaultPictureDimension"].GetString()<<
|
||||||
|
", "<<*/
|
||||||
|
article_id
|
||||||
|
<< ", " << title << ", " << yearstr(yearb) << ", "
|
||||||
|
<< yearstr(yeare) << ", " << description << ", "
|
||||||
|
<< itemlink << ", " << imglink << ", "
|
||||||
|
<< article_id + "-" + mediaid + ".jpeg"
|
||||||
|
<< ", " << subjects << ", " << publishdate << ", "
|
||||||
|
<< "Länge leve Kosta! exhibition"
|
||||||
|
<< ", "
|
||||||
|
<< "Kulturparken Småland / Smålands museum"
|
||||||
|
<< ", " << exif_model << ", " << exif_iso << ", "
|
||||||
|
<< exif_focallength << ", " << exif_exposuretime << ", "
|
||||||
|
<< exif_aperture << ", " << exif_datetimeoriginal
|
||||||
|
<< ", " << license << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_file1.close();
|
out_file1.close();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue