From aa04d33e0284dd15d381d876df7269392aeb9c84 Mon Sep 17 00:00:00 2001 From: Greisberger Christophe Date: Fri, 9 Feb 2024 08:30:50 +0100 Subject: [PATCH] Enhancement: extend drogon::ContentType for file handling (#1921) --- lib/inc/drogon/HttpTypes.h | 69 +++- lib/src/HttpFileImpl.h | 3 + lib/src/HttpUtils.cc | 583 ++++++++++++++-------------- lib/src/HttpUtils.h | 9 + lib/tests/unittests/FileTypeTest.cc | 59 +++ 5 files changed, 417 insertions(+), 306 deletions(-) diff --git a/lib/inc/drogon/HttpTypes.h b/lib/inc/drogon/HttpTypes.h index a2f3af5c..63407fd4 100644 --- a/lib/inc/drogon/HttpTypes.h +++ b/lib/inc/drogon/HttpTypes.h @@ -106,26 +106,71 @@ enum ContentType CT_APPLICATION_X_JAVASCRIPT [[deprecated("use CT_TEXT_JAVASCRIPT")]], CT_TEXT_JAVASCRIPT, CT_TEXT_CSS, - CT_TEXT_XML, - CT_APPLICATION_XML, + CT_TEXT_CSV, + CT_TEXT_XML, // suggests human readable xml + CT_APPLICATION_XML, // suggest machine-to-machine xml CT_TEXT_XSL, CT_APPLICATION_WASM, CT_APPLICATION_OCTET_STREAM, - CT_APPLICATION_X_FONT_TRUETYPE, - CT_APPLICATION_X_FONT_OPENTYPE, CT_APPLICATION_FONT_WOFF, CT_APPLICATION_FONT_WOFF2, - CT_APPLICATION_VND_MS_FONTOBJ, + CT_APPLICATION_GZIP, + CT_APPLICATION_JAVA_ARCHIVE, CT_APPLICATION_PDF, - CT_IMAGE_SVG_XML, - CT_IMAGE_PNG, - CT_IMAGE_WEBP, + CT_APPLICATION_MSWORD, + CT_APPLICATION_MSWORDX, + CT_APPLICATION_VND_MS_FONTOBJ, + CT_APPLICATION_VND_RAR, + CT_APPLICATION_XHTML, + CT_APPLICATION_X_7Z, + CT_APPLICATION_X_BZIP, + CT_APPLICATION_X_BZIP2, + CT_APPLICATION_X_HTTPD_PHP, + CT_APPLICATION_X_FONT_TRUETYPE, + CT_APPLICATION_X_FONT_OPENTYPE, + CT_APPLICATION_X_TAR, + CT_APPLICATION_X_TGZ, + CT_APPLICATION_X_XZ, + CT_APPLICATION_ZIP, + CT_AUDIO_AAC, + CT_AUDIO_AC3, + CT_AUDIO_AIFF, + CT_AUDIO_FLAC, + CT_AUDIO_MATROSKA, + CT_AUDIO_MPEG, + CT_AUDIO_MPEG4, + CT_AUDIO_OGG, + CT_AUDIO_WAVE, + CT_AUDIO_WEBM, + CT_AUDIO_X_APE, + CT_AUDIO_X_MS_WMA, + CT_AUDIO_X_TTA, + CT_AUDIO_X_WAVPACK, + CT_IMAGE_APNG, CT_IMAGE_AVIF, - CT_IMAGE_JPG, - CT_IMAGE_GIF, - CT_IMAGE_XICON, - CT_IMAGE_ICNS, CT_IMAGE_BMP, + CT_IMAGE_GIF, + CT_IMAGE_ICNS, + CT_IMAGE_JPG, + CT_IMAGE_JP2, + CT_IMAGE_PNG, + CT_IMAGE_SVG_XML, + CT_IMAGE_TIFF, + CT_IMAGE_WEBP, + CT_IMAGE_X_MNG, + CT_IMAGE_X_TGA, + CT_IMAGE_XICON, + CT_VIDEO_APG, + CT_VIDEO_AV1, + CT_VIDEO_QUICKTIME, + CT_VIDEO_MATROSKA, + CT_VIDEO_MP4, + CT_VIDEO_MPEG, + CT_VIDEO_MPEG2TS, + CT_VIDEO_OGG, + CT_VIDEO_WEBM, + CT_VIDEO_X_M4V, + CT_VIDEO_X_MSVIDEO, CT_MULTIPART_FORM_DATA, CT_CUSTOM }; diff --git a/lib/src/HttpFileImpl.h b/lib/src/HttpFileImpl.h index cd05c9fd..f0c7a2d6 100644 --- a/lib/src/HttpFileImpl.h +++ b/lib/src/HttpFileImpl.h @@ -117,6 +117,9 @@ class HttpFileImpl /// Return the type of file. FileType getFileType() const noexcept { + auto ft = drogon::getFileType(contentType_); + if ((ft != FT_UNKNOWN) && (ft != FT_CUSTOM)) + return ft; return parseFileType(getFileExtension()); } diff --git a/lib/src/HttpUtils.cc b/lib/src/HttpUtils.cc index 4fad29b7..5cfcbb7d 100644 --- a/lib/src/HttpUtils.cc +++ b/lib/src/HttpUtils.cc @@ -15,12 +15,219 @@ #include "HttpUtils.h" #include #include +#include #include +#include namespace drogon { static std::unordered_map customMime; +// https://en.wikipedia.org/wiki/List_of_file_formats +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types +// https://www.digipres.org/formats/mime-types/ +// https://www.iana.org/assignments/media-types/media-types.xhtml +// content type -> list of corresponding mime types, the first being the default +// (more standard) one + the mime type to return in contentTypeToMime() when not +// empty (mainly to return the charset for text types) +static const std::unordered_map< + ContentType, + std::pair, std::string_view>> + mimeTypeDatabase_{ + {CT_NONE, {{""}, ""}}, + {CT_APPLICATION_OCTET_STREAM, {{"application/octet-stream"}, ""}}, + {CT_APPLICATION_X_FORM, {{"application/x-www-form-urlencoded"}, ""}}, + {CT_MULTIPART_FORM_DATA, {{"multipart/form-data"}, ""}}, + {CT_APPLICATION_GZIP, {{"application/gzip"}, ""}}, + {CT_APPLICATION_JSON, + {{"application/json"}, "application/json; charset=utf-8"}}, + {CT_APPLICATION_FONT_WOFF, {{"application/font-woff"}, ""}}, + {CT_APPLICATION_FONT_WOFF2, {{"application/font-woff2"}, ""}}, + {CT_APPLICATION_JAVA_ARCHIVE, + {{"application/java-archive", "application/x-java-archive"}, ""}}, + {CT_APPLICATION_MSWORD, {{"application/msword"}, ""}}, + {CT_APPLICATION_MSWORDX, + {{"application/" + "vnd.openxmlformats-officedocument.wordprocessingml.document"}, + ""}}, + {CT_APPLICATION_PDF, {{"application/pdf"}, ""}}, + {CT_APPLICATION_VND_MS_FONTOBJ, + {{"application/vnd.ms-fontobject"}, ""}}, + {CT_APPLICATION_VND_RAR, {{"application/vnd.rar"}, ""}}, + {CT_APPLICATION_WASM, {{"application/wasm"}, ""}}, + {CT_APPLICATION_X_BZIP, {{"application/x-bzip"}, ""}}, + {CT_APPLICATION_X_BZIP2, {{"application/x-bzip2"}, ""}}, + {CT_APPLICATION_X_7Z, {{"application/x-7z-compressed"}, ""}}, + {CT_APPLICATION_X_HTTPD_PHP, {{"application/x-httpd-php"}, ""}}, + {CT_APPLICATION_X_JAVASCRIPT, + {{"application/x-javascript"}, + "application/x-javascript; charset=utf-8"}}, + {CT_APPLICATION_X_FONT_OPENTYPE, + {{"application/x-font-opentype", "font/otf"}, ""}}, + {CT_APPLICATION_X_FONT_TRUETYPE, + {{"application/x-font-truetype", "font/ttf"}, ""}}, + {CT_APPLICATION_X_TAR, {{"application/x-tar"}, ""}}, + {CT_APPLICATION_X_TGZ, {{"application/x-tgz"}, ""}}, + {CT_APPLICATION_X_XZ, {{"application/x-xz", "application/x-lzma"}, ""}}, + {CT_APPLICATION_XHTML, + {{"application/xhtml+xml", "application/xhtml"}, + "application/xhtml+xml; charset=utf-8"}}, + {CT_APPLICATION_XML, + {{"application/xml"}, "application/xml; charset=utf-8"}}, + {CT_APPLICATION_ZIP, {{"application/zip"}, ""}}, + {CT_AUDIO_AAC, {{"audio/aac", "audio/aacp"}, ""}}, + {CT_AUDIO_AC3, {{"audio/ac3"}, ""}}, + {CT_AUDIO_AIFF, {{"audio/aiff", "audio/x-aiff"}, ""}}, + {CT_AUDIO_FLAC, {{"audio/flac"}, ""}}, + {CT_AUDIO_MATROSKA, {{"audio/matroska", "audio/x-matroska"}, ""}}, + {CT_AUDIO_MPEG, {{"audio/mpeg"}, ""}}, + {CT_AUDIO_MPEG4, {{"audio/mp4", "audio/x-m4a"}, ""}}, + {CT_AUDIO_OGG, {{"audio/ogg"}, ""}}, + {CT_AUDIO_WAVE, {{"audio/wav", "audio/x-wav"}, ""}}, + {CT_AUDIO_X_APE, {{"audio/x-ape"}, ""}}, + {CT_AUDIO_X_MS_WMA, {{"audio/x-ms-wma"}, ""}}, + {CT_AUDIO_X_TTA, {{"audio/x-tta"}, ""}}, + {CT_AUDIO_X_WAVPACK, {{"audio/x-wavpack"}, ""}}, + {CT_AUDIO_WEBM, {{"audio/webm"}, ""}}, + {CT_IMAGE_APNG, {{"image/apng"}, ""}}, + {CT_IMAGE_AVIF, {{"image/avif"}, ""}}, + {CT_IMAGE_BMP, {{"image/bmp"}, ""}}, + {CT_IMAGE_GIF, {{"image/gif"}, ""}}, + {CT_IMAGE_ICNS, {{"image/icns"}, ""}}, + {CT_IMAGE_JP2, {{"image/jp2", "image/jpx", "image/jpm"}, ""}}, + {CT_IMAGE_JPG, {{"image/jpeg"}, ""}}, + {CT_IMAGE_PNG, {{"image/png"}, ""}}, + {CT_IMAGE_SVG_XML, {{"image/svg+xml"}, ""}}, + {CT_IMAGE_TIFF, {{"image/tiff"}, ""}}, + {CT_IMAGE_WEBP, {{"image/webp"}, ""}}, + {CT_IMAGE_X_MNG, {{"image/x-mng"}, ""}}, + {CT_IMAGE_X_TGA, {{"image/x-tga", "image/x-targa"}, ""}}, + {CT_IMAGE_XICON, {{"image/vnd.microsoft.icon", "image/x-icon"}, ""}}, + {CT_TEXT_CSS, {{"text/css"}, "text/css; charset=utf-8"}}, + {CT_TEXT_CSV, {{"text/csv"}, "text/csv; charset=utf-8"}}, + {CT_TEXT_HTML, {{"text/html"}, "text/html; charset=utf-8"}}, + {CT_TEXT_JAVASCRIPT, + {{"text/javascript"}, "text/javascript; charset=utf-8"}}, + {CT_TEXT_PLAIN, {{"text/plain"}, "text/plain; charset=utf-8"}}, + {CT_TEXT_XML, {{"text/xml"}, "text/xml; charset=utf-8"}}, + {CT_TEXT_XSL, {{"text/xsl"}, "text/xsl; charset=utf-8"}}, + {CT_VIDEO_APG, {{"video/apg"}, ""}}, + {CT_VIDEO_AV1, {{"video/av01", "video/av1"}, ""}}, + {CT_VIDEO_QUICKTIME, {{"video/quicktime"}, ""}}, + {CT_VIDEO_MPEG, {{"video/mpeg"}, ""}}, + {CT_VIDEO_MPEG2TS, {{"video/mp2t"}, ""}}, + {CT_VIDEO_MP4, {{"video/mp4"}, ""}}, + {CT_VIDEO_OGG, {{"video/ogg"}, ""}}, + {CT_VIDEO_WEBM, {{"video/webm"}, ""}}, + {CT_VIDEO_X_M4V, {{"video/x-m4v"}, ""}}, + {CT_VIDEO_MATROSKA, {{"video/matroska", "video/x-matroska"}, ""}}, + {CT_VIDEO_X_MSVIDEO, {{"video/x-msvideo"}, ""}}, + }; + +static const std::unordered_map> + fileTypeDatabase_{ + {"", {FT_UNKNOWN, CT_CUSTOM}}, + {"aac", {FT_AUDIO, CT_AUDIO_AAC}}, + {"ac3", {FT_AUDIO, CT_AUDIO_AC3}}, + {"aif", {FT_AUDIO, CT_AUDIO_AIFF}}, + {"aifc", {FT_AUDIO, CT_AUDIO_AIFF}}, + {"aiff", {FT_AUDIO, CT_AUDIO_AIFF}}, + {"apg", {FT_AUDIO, CT_VIDEO_APG}}, + {"ape", {FT_AUDIO, CT_AUDIO_X_APE}}, + {"apng", {FT_IMAGE, CT_IMAGE_APNG}}, + {"av1", {FT_MEDIA, CT_VIDEO_AV1}}, + {"avi", {FT_MEDIA, CT_VIDEO_X_MSVIDEO}}, + {"avif", {FT_IMAGE, CT_IMAGE_AVIF}}, + {"bmp", {FT_IMAGE, CT_IMAGE_BMP}}, + {"bz", {FT_ARCHIVE, CT_APPLICATION_X_BZIP}}, + {"bz2", {FT_ARCHIVE, CT_APPLICATION_X_BZIP2}}, + {"css", {FT_DOCUMENT, CT_TEXT_CSS}}, + {"csv", {FT_DOCUMENT, CT_TEXT_CSV}}, + {"doc", {FT_DOCUMENT, CT_APPLICATION_MSWORD}}, + {"docx", {FT_DOCUMENT, CT_APPLICATION_MSWORDX}}, + {"eot", {FT_DOCUMENT, CT_APPLICATION_VND_MS_FONTOBJ}}, + {"flac", {FT_AUDIO, CT_AUDIO_FLAC}}, + {"gif", {FT_MEDIA, CT_IMAGE_GIF}}, + {"gz", {FT_ARCHIVE, CT_APPLICATION_GZIP}}, + {"htm", {FT_DOCUMENT, CT_TEXT_HTML}}, + {"html", {FT_DOCUMENT, CT_TEXT_HTML}}, + {"icns", {FT_IMAGE, CT_IMAGE_ICNS}}, + {"ico", {FT_IMAGE, CT_IMAGE_XICON}}, + {"j2k", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jar", {FT_DOCUMENT, CT_APPLICATION_JAVA_ARCHIVE}}, + {"j2c", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jp2", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jpeg", {FT_IMAGE, CT_IMAGE_JPG}}, + {"jpc", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jpf", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jpg", {FT_IMAGE, CT_IMAGE_JPG}}, + {"jpg2", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jpm", {FT_IMAGE, CT_IMAGE_JP2}}, + {"jpx", {FT_IMAGE, CT_IMAGE_JP2}}, + {"js", {FT_DOCUMENT, CT_TEXT_JAVASCRIPT}}, + {"json", {FT_DOCUMENT, CT_APPLICATION_JSON}}, + {"lzma", {FT_ARCHIVE, CT_APPLICATION_X_XZ}}, + {"m1a", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"m1v", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"m2a", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"m2ts", {FT_MEDIA, CT_VIDEO_MPEG2TS}}, + {"m2v", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"m4a", {FT_AUDIO, CT_AUDIO_MPEG4}}, + {"m4v", {FT_MEDIA, CT_VIDEO_X_M4V}}, + {"mjs", {FT_DOCUMENT, CT_TEXT_JAVASCRIPT}}, + {"mka", {FT_AUDIO, CT_AUDIO_MATROSKA}}, + {"mkv", {FT_MEDIA, CT_VIDEO_MATROSKA}}, + {"mng", {FT_MEDIA, CT_IMAGE_X_MNG}}, + {"mov", {FT_MEDIA, CT_VIDEO_QUICKTIME}}, + {"mp1", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"mp2", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"mp3", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"mp4", {FT_MEDIA, CT_VIDEO_MP4}}, + {"mpa", {FT_AUDIO, CT_AUDIO_MPEG}}, + {"mpe", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"mpeg", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"mpg", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"mpv", {FT_MEDIA, CT_VIDEO_MPEG}}, + {"oga", {FT_AUDIO, CT_AUDIO_OGG}}, + {"ogg", {FT_AUDIO, CT_AUDIO_OGG}}, + {"ogv", {FT_MEDIA, CT_VIDEO_OGG}}, + {"otf", {FT_DOCUMENT, CT_APPLICATION_X_FONT_OPENTYPE}}, + {"pdf", {FT_DOCUMENT, CT_APPLICATION_PDF}}, + {"php", {FT_DOCUMENT, CT_APPLICATION_X_HTTPD_PHP}}, + {"png", {FT_IMAGE, CT_IMAGE_PNG}}, + {"rar", {FT_ARCHIVE, CT_APPLICATION_VND_RAR}}, + {"svg", {FT_IMAGE, CT_IMAGE_SVG_XML}}, + {"tar", {FT_ARCHIVE, CT_APPLICATION_X_TAR}}, + {"targa", {FT_IMAGE, CT_IMAGE_X_TGA}}, + {"tif", {FT_IMAGE, CT_IMAGE_TIFF}}, + {"tiff", {FT_IMAGE, CT_IMAGE_TIFF}}, + {"tga", {FT_IMAGE, CT_IMAGE_X_TGA}}, + {"tgz", {FT_ARCHIVE, CT_APPLICATION_X_TGZ}}, + {"ts", {FT_MEDIA, CT_VIDEO_MPEG2TS}}, + {"tta", {FT_AUDIO, CT_AUDIO_X_TTA}}, + {"ttf", {FT_DOCUMENT, CT_APPLICATION_X_FONT_TRUETYPE}}, + {"txt", {FT_DOCUMENT, CT_TEXT_PLAIN}}, + {"w64", {FT_AUDIO, CT_AUDIO_WAVE}}, + {"wav", {FT_AUDIO, CT_AUDIO_WAVE}}, + {"wave", {FT_AUDIO, CT_AUDIO_WAVE}}, + {"wasm", {FT_DOCUMENT, CT_APPLICATION_WASM}}, + {"weba", {FT_AUDIO, CT_AUDIO_WEBM}}, + {"webm", {FT_MEDIA, CT_VIDEO_WEBM}}, + {"webp", {FT_IMAGE, CT_IMAGE_WEBP}}, + {"wma", {FT_AUDIO, CT_AUDIO_X_MS_WMA}}, + {"woff", {FT_DOCUMENT, CT_APPLICATION_FONT_WOFF}}, + {"woff2", {FT_DOCUMENT, CT_APPLICATION_FONT_WOFF2}}, + {"wv", {FT_AUDIO, CT_AUDIO_X_WAVPACK}}, + {"xht", {FT_DOCUMENT, CT_APPLICATION_XHTML}}, + {"xhtml", {FT_DOCUMENT, CT_APPLICATION_XHTML}}, + {"xml", {FT_DOCUMENT, CT_APPLICATION_XML}}, + {"xsl", {FT_DOCUMENT, CT_TEXT_XSL}}, + {"xz", {FT_ARCHIVE, CT_APPLICATION_X_XZ}}, + {"zip", {FT_ARCHIVE, CT_APPLICATION_ZIP}}, + {"7z", {FT_ARCHIVE, CT_APPLICATION_X_7Z}}, + }; + const std::string_view &statusCodeToString(int code) { switch (code) @@ -386,305 +593,69 @@ ContentType getContentType(const std::string &fileName) extName.begin(), [](unsigned char c) { return tolower(c); }); } - switch (extName.length()) - { - case 0: - return CT_APPLICATION_OCTET_STREAM; - case 2: - { - if (extName == "js") - return CT_TEXT_JAVASCRIPT; - return CT_APPLICATION_OCTET_STREAM; - } - case 3: - { - switch (extName[0]) - { - case 'b': - if (extName == "bmp") - return CT_IMAGE_BMP; - break; - case 'c': - if (extName == "css") - return CT_TEXT_CSS; - break; - case 'e': - if (extName == "eot") - return CT_APPLICATION_VND_MS_FONTOBJ; - break; - case 'g': - if (extName == "gif") - return CT_IMAGE_GIF; - break; - case 'i': - if (extName == "ico") - return CT_IMAGE_XICON; - break; - case 'j': - if (extName == "jpg") - return CT_IMAGE_JPG; - break; - case 'o': - if (extName == "otf") - return CT_APPLICATION_X_FONT_OPENTYPE; - break; - case 'p': - if (extName == "png") - return CT_IMAGE_PNG; - else if (extName == "pdf") - return CT_APPLICATION_PDF; - break; - case 's': - if (extName == "svg") - return CT_IMAGE_SVG_XML; - break; - case 't': - if (extName == "txt") - return CT_TEXT_PLAIN; - else if (extName == "ttf") - return CT_APPLICATION_X_FONT_TRUETYPE; - break; - case 'x': - if (extName == "xml") - return CT_TEXT_XML; - else if (extName == "xsl") - return CT_TEXT_XSL; - break; - default: - break; - } - return CT_APPLICATION_OCTET_STREAM; - } - case 4: - { - if (extName == "avif") - return CT_IMAGE_AVIF; - else if (extName == "html") - return CT_TEXT_HTML; - else if (extName == "jpeg") - return CT_IMAGE_JPG; - else if (extName == "icns") - return CT_IMAGE_ICNS; - else if (extName == "webp") - return CT_IMAGE_WEBP; - else if (extName == "wasm") - return CT_APPLICATION_WASM; - else if (extName == "woff") - return CT_APPLICATION_FONT_WOFF; - return CT_APPLICATION_OCTET_STREAM; - } - case 5: - { - if (extName == "woff2") - return CT_APPLICATION_FONT_WOFF2; - return CT_APPLICATION_OCTET_STREAM; - } - default: - return CT_APPLICATION_OCTET_STREAM; - } + auto it = fileTypeDatabase_.find(extName); + return (it == fileTypeDatabase_.end()) ? CT_APPLICATION_OCTET_STREAM + : it->second.second; } ContentType parseContentType(const std::string_view &contentType) { - static const std::unordered_map map_{ - {"text/html", CT_TEXT_HTML}, - {"application/x-www-form-urlencoded", CT_APPLICATION_X_FORM}, - {"application/xml", CT_APPLICATION_XML}, - {"application/json", CT_APPLICATION_JSON}, - {"application/x-javascript", CT_APPLICATION_X_JAVASCRIPT}, - {"text/javascript", CT_TEXT_JAVASCRIPT}, - {"text/css", CT_TEXT_CSS}, - {"text/xml", CT_TEXT_XML}, - {"text/xsl", CT_TEXT_XSL}, - {"application/octet-stream", CT_APPLICATION_OCTET_STREAM}, - {"image/svg+xml", CT_IMAGE_SVG_XML}, - {"application/x-font-truetype", CT_APPLICATION_X_FONT_TRUETYPE}, - {"application/x-font-opentype", CT_APPLICATION_X_FONT_OPENTYPE}, - {"application/font-woff", CT_APPLICATION_FONT_WOFF}, - {"application/font-woff2", CT_APPLICATION_FONT_WOFF2}, - {"application/vnd.ms-fontobject", CT_APPLICATION_VND_MS_FONTOBJ}, - {"application/pdf", CT_APPLICATION_PDF}, - {"image/png", CT_IMAGE_PNG}, - {"image/webp", CT_IMAGE_WEBP}, - {"image/avif", CT_IMAGE_AVIF}, - {"image/jpeg", CT_IMAGE_JPG}, - {"image/gif", CT_IMAGE_GIF}, - {"image/x-icon", CT_IMAGE_XICON}, - {"image/bmp", CT_IMAGE_BMP}, - {"image/icns", CT_IMAGE_ICNS}, - {"application/wasm", CT_APPLICATION_WASM}, - {"text/plain", CT_TEXT_PLAIN}, - {"multipart/form-data", CT_MULTIPART_FORM_DATA}}; - auto iter = map_.find(contentType); - if (iter == map_.end()) - return CT_NONE; - return iter->second; + // Generate map from database for faster query + static std::unordered_map contentTypeMap_; + // Thread safe initialization + static std::once_flag flag; + std::call_once(flag, []() { + for (const auto &e : mimeTypeDatabase_) + { + for (const auto &type : e.second.first) + contentTypeMap_[type] = e.first; + } + }); + auto ext = contentType.find(';'); + if (ext != std::string_view::npos) + return parseContentType(contentType.substr(0, ext)); + if (contentType == "application/x-www-form-urlencoded") + return CT_APPLICATION_X_FORM; + if (contentType == "multipart/form-data") + return CT_MULTIPART_FORM_DATA; + auto it = contentTypeMap_.find(contentType); + return (it == contentTypeMap_.end()) ? CT_CUSTOM : it->second; } FileType parseFileType(const std::string_view &fileExtension) { - // https://en.wikipedia.org/wiki/List_of_file_formats - static const std::unordered_map map_{ - {"", FT_UNKNOWN}, {"html", FT_DOCUMENT}, {"docx", FT_DOCUMENT}, - {"zip", FT_ARCHIVE}, {"rar", FT_ARCHIVE}, {"xz", FT_ARCHIVE}, - {"7z", FT_ARCHIVE}, {"tgz", FT_ARCHIVE}, {"gz", FT_ARCHIVE}, - {"bz2", FT_ARCHIVE}, {"mp3", FT_AUDIO}, {"wav", FT_AUDIO}, - {"avi", FT_MEDIA}, {"gif", FT_MEDIA}, {"mp4", FT_MEDIA}, - {"mov", FT_MEDIA}, {"png", FT_IMAGE}, {"jpeg", FT_IMAGE}, - {"jpg", FT_IMAGE}, {"ico", FT_IMAGE}}; - auto iter = map_.find(fileExtension); - if (iter == map_.end()) - return FT_CUSTOM; - return iter->second; + std::string extName(fileExtension); + transform(extName.begin(), + extName.end(), + extName.begin(), + [](unsigned char c) { return tolower(c); }); + auto it = fileTypeDatabase_.find(extName); + return (it == fileTypeDatabase_.end()) ? FT_CUSTOM : it->second.first; } -const std::string_view &contentTypeToMime(ContentType contenttype) +FileType getFileType(ContentType contentType) { - switch (contenttype) - { - case CT_TEXT_HTML: - { - static std::string_view sv = "text/html; charset=utf-8"; - return sv; - } - case CT_APPLICATION_X_FORM: - { - static std::string_view sv = "application/x-www-form-urlencoded"; - return sv; - } - case CT_APPLICATION_XML: - { - static std::string_view sv = "application/xml; charset=utf-8"; - return sv; - } - case CT_APPLICATION_JSON: - { - static std::string_view sv = "application/json; charset=utf-8"; - return sv; - } - case CT_APPLICATION_X_JAVASCRIPT: - { - static std::string_view sv = - "application/x-javascript; charset=utf-8"; - return sv; - } - case CT_TEXT_JAVASCRIPT: - { - static std::string_view sv = "text/javascript; charset=utf-8"; - return sv; - } - case CT_TEXT_CSS: - { - static std::string_view sv = "text/css; charset=utf-8"; - return sv; - } - case CT_TEXT_XML: - { - static std::string_view sv = "text/xml; charset=utf-8"; - return sv; - } - case CT_TEXT_XSL: - { - static std::string_view sv = "text/xsl; charset=utf-8"; - return sv; - } - case CT_APPLICATION_OCTET_STREAM: - { - static std::string_view sv = "application/octet-stream"; - return sv; - } - case CT_IMAGE_SVG_XML: - { - static std::string_view sv = "image/svg+xml"; - return sv; - } - case CT_APPLICATION_X_FONT_TRUETYPE: - { - static std::string_view sv = "application/x-font-truetype"; - return sv; - } - case CT_APPLICATION_X_FONT_OPENTYPE: - { - static std::string_view sv = "application/x-font-opentype"; - return sv; - } - case CT_APPLICATION_FONT_WOFF: - { - static std::string_view sv = "application/font-woff"; - return sv; - } - case CT_APPLICATION_FONT_WOFF2: - { - static std::string_view sv = "application/font-woff2"; - return sv; - } - case CT_APPLICATION_VND_MS_FONTOBJ: - { - static std::string_view sv = "application/vnd.ms-fontobject"; - return sv; - } - case CT_APPLICATION_PDF: - { - static std::string_view sv = "application/pdf"; - return sv; - } - case CT_IMAGE_PNG: - { - static std::string_view sv = "image/png"; - return sv; - } - case CT_IMAGE_AVIF: - { - static std::string_view sv = "image/avif"; - return sv; - } - case CT_IMAGE_WEBP: - { - static std::string_view sv = "image/webp"; - return sv; - } - case CT_IMAGE_JPG: - { - static std::string_view sv = "image/jpeg"; - return sv; - } - case CT_IMAGE_GIF: - { - static std::string_view sv = "image/gif"; - return sv; - } - case CT_IMAGE_XICON: - { - static std::string_view sv = "image/x-icon"; - return sv; - } - case CT_IMAGE_BMP: - { - static std::string_view sv = "image/bmp"; - return sv; - } - case CT_IMAGE_ICNS: - { - static std::string_view sv = "image/icns"; - return sv; - } - case CT_APPLICATION_WASM: - { - static std::string_view sv = "application/wasm"; - return sv; - } - case CT_NONE: - { - static std::string_view sv = ""; - return sv; - } - default: - case CT_TEXT_PLAIN: - { - static std::string_view sv = "text/plain; charset=utf-8"; - return sv; - } - } + // Generate map from database for faster query + static std::unordered_map fileTypeMap_; + // Thread safe initialization + static std::once_flag flag; + std::call_once(flag, []() { + for (const auto &e : fileTypeDatabase_) + fileTypeMap_[e.second.second] = e.second.first; + fileTypeMap_[CT_NONE] = FT_UNKNOWN; + fileTypeMap_[CT_CUSTOM] = FT_CUSTOM; + }); + auto it = fileTypeMap_.find(contentType); + return (it == fileTypeMap_.end()) ? FT_UNKNOWN : it->second; +} + +const std::string_view &contentTypeToMime(ContentType contentType) +{ + auto it = mimeTypeDatabase_.find(contentType); + return (it == mimeTypeDatabase_.end()) + ? mimeTypeDatabase_.at(CT_APPLICATION_OCTET_STREAM).first.front() + : (it->second.second.empty() ? it->second.first.front() + : it->second.second); } void registerCustomExtensionMime(const std::string &ext, @@ -703,9 +674,9 @@ void registerCustomExtensionMime(const std::string &ext, const std::string_view fileNameToMime(const std::string &fileName) { - ContentType intenalContentType = getContentType(fileName); - if (intenalContentType != CT_APPLICATION_OCTET_STREAM) - return contentTypeToMime(intenalContentType); + ContentType internalContentType = getContentType(fileName); + if (internalContentType != CT_APPLICATION_OCTET_STREAM) + return contentTypeToMime(internalContentType); std::string extName; auto pos = fileName.rfind('.'); @@ -726,9 +697,9 @@ const std::string_view fileNameToMime(const std::string &fileName) std::pair fileNameToContentTypeAndMime( const std::string &fileName) { - ContentType intenalContentType = getContentType(fileName); - if (intenalContentType != CT_APPLICATION_OCTET_STREAM) - return {intenalContentType, contentTypeToMime(intenalContentType)}; + ContentType internalContentType = getContentType(fileName); + if (internalContentType != CT_APPLICATION_OCTET_STREAM) + return {internalContentType, contentTypeToMime(internalContentType)}; std::string extName; auto pos = fileName.rfind('.'); @@ -745,4 +716,28 @@ std::pair fileNameToContentTypeAndMime( return {CT_NONE, ""}; return {CT_CUSTOM, it->second}; } + +const std::vector &getFileExtensions(ContentType contentType) +{ + // Generate map from database for faster query + static std::unordered_map> + extensionMap_; + static std::vector notFound_; + // Thread safe initialization + static std::once_flag flag; + std::call_once(flag, []() { + for (const auto &e : fileTypeDatabase_) + if (!e.first.empty()) + extensionMap_[e.second.second].push_back(e.first); + // Add deprecated + extensionMap_[CT_APPLICATION_X_JAVASCRIPT] = + extensionMap_[CT_TEXT_JAVASCRIPT]; + extensionMap_[CT_TEXT_XML] = extensionMap_[CT_APPLICATION_XML]; + }); + auto it = extensionMap_.find(contentType); + if (it == extensionMap_.end()) + return notFound_; + return it->second; +} + } // namespace drogon diff --git a/lib/src/HttpUtils.h b/lib/src/HttpUtils.h index 882823d5..16a6c217 100644 --- a/lib/src/HttpUtils.h +++ b/lib/src/HttpUtils.h @@ -26,6 +26,7 @@ const std::string_view &statusCodeToString(int code); ContentType getContentType(const std::string &fileName); ContentType parseContentType(const std::string_view &contentType); FileType parseFileType(const std::string_view &fileExtension); +FileType getFileType(ContentType contentType); void registerCustomExtensionMime(const std::string &ext, const std::string &mime); const std::string_view fileNameToMime(const std::string &fileName); @@ -40,6 +41,14 @@ inline std::string_view getFileExtension(const std::string &fileName) return std::string_view(&fileName[pos + 1], fileName.length() - pos - 1); } +const std::vector &getFileExtensions(ContentType contentType); + +inline const std::vector &getFileExtensions( + const std::string_view &contentType) +{ + return getFileExtensions(parseContentType(contentType)); +} + template inline constexpr const char *contentLengthFormatString() { diff --git a/lib/tests/unittests/FileTypeTest.cc b/lib/tests/unittests/FileTypeTest.cc index 259c1156..de044f7c 100644 --- a/lib/tests/unittests/FileTypeTest.cc +++ b/lib/tests/unittests/FileTypeTest.cc @@ -24,6 +24,65 @@ DROGON_TEST(ExtensionTest) } } +DROGON_TEST(ContentTypeTest) +{ + SUBSECTION(normal) + { + for (int i = CT_NONE + 1; i < CT_CUSTOM; i++) + { + auto contentType = ContentType(i); + const auto mimeType = contentTypeToMime(contentType); + CHECK(mimeType.empty() == false); + CHECK(parseContentType(mimeType) == contentType); + auto exts = getFileExtensions(contentType); + bool shouldBeEmpty = (contentType == CT_APPLICATION_X_FORM) || + (contentType == CT_APPLICATION_OCTET_STREAM) || + (contentType == CT_MULTIPART_FORM_DATA); + CHECK(exts.empty() == shouldBeEmpty); + for (const auto &ext : exts) + { + auto dummyFile{std::string("dummy.").append(ext)}; + // Handle multiple mime types by setting the default ContentType + if (contentType == CT_APPLICATION_X_JAVASCRIPT) + contentType = CT_TEXT_JAVASCRIPT; + if (contentType == CT_TEXT_XML) + contentType = CT_APPLICATION_XML; + CHECK(getContentType(dummyFile) == contentType); + CHECK(parseFileType(dummyFile) != FT_UNKNOWN); + } + if (!shouldBeEmpty) + { + CHECK(getFileType(contentType) != FT_UNKNOWN); + CHECK(getFileType(contentType) != FT_CUSTOM); + } + } + } + + SUBSECTION(negative) + { + CHECK(getFileType(CT_NONE) == FT_UNKNOWN); + CHECK(getFileType(CT_CUSTOM) == FT_CUSTOM); + CHECK(getFileType(CT_APPLICATION_X_FORM) == FT_UNKNOWN); + CHECK(getFileType(CT_APPLICATION_OCTET_STREAM) == FT_UNKNOWN); + CHECK(getFileType(CT_MULTIPART_FORM_DATA) == FT_UNKNOWN); + CHECK(contentTypeToMime(CT_NONE).empty()); + CHECK(contentTypeToMime(CT_CUSTOM) == + contentTypeToMime(CT_APPLICATION_OCTET_STREAM)); + CHECK(parseContentType("") == CT_NONE); + CHECK(parseContentType("application/x-www-form-urlencoded") == + CT_APPLICATION_X_FORM); + CHECK(parseContentType("multipart/form-data") == + CT_MULTIPART_FORM_DATA); + CHECK(parseFileType("any.thing") == FT_CUSTOM); + CHECK(getContentType("any.thing") == CT_APPLICATION_OCTET_STREAM); + CHECK(getFileExtensions(CT_NONE).empty()); + CHECK(getFileExtensions(CT_APPLICATION_X_FORM).empty()); + CHECK(getFileExtensions(CT_APPLICATION_OCTET_STREAM).empty()); + CHECK(getFileExtensions(CT_MULTIPART_FORM_DATA).empty()); + CHECK(getFileExtensions(CT_CUSTOM).empty()); + } +} + DROGON_TEST(FileTypeTest) { SUBSECTION(normal)