unlimited-storage

YouTube filesystem tool for uploading arbitrary data to the service
git clone git://git.laack.co/unlimited-storage.git
Log | Files | Refs | README

chunk.cpp (10213B)


      1 # include "../include/chunk.h"
      2 # include "filesystem"
      3 # include <iostream>
      4 # include <fstream>
      5 # include <bitset>
      6 # include "../include/debug.h"
      7 
      8 
      9 const int IMAGE_HEADER_SIZE = 48;
     10 
     11 // read in saved chunk from file
     12 Chunk::Chunk(std::string filename) {
     13 
     14     std::ifstream file;
     15 
     16     file.open(filename);
     17 
     18     std::string fileString = "";
     19 
     20     char byte;
     21 
     22     while (file.read(&byte, 1)) {
     23         fileString += byte;
     24     }
     25 
     26     file.close();
     27 
     28     // header (.pbm header), comment (.pbm comment), dims (.pbm spec)
     29 
     30     std::string header = "";
     31     std::string comment = "";
     32     std::string dims  = "";
     33 
     34     uint itr = 0;
     35     while (fileString[itr] != '\n') {
     36         header += fileString[itr];
     37         itr += 1;
     38     }
     39 
     40     itr += 1;
     41 
     42     while (fileString[itr] != '\n') {
     43         comment += fileString[itr];
     44         itr += 1;
     45     }
     46 
     47 
     48     itr += 1;
     49 
     50     while (fileString[itr] != '\n') {
     51         dims += fileString[itr];
     52         itr += 1;
     53     }
     54 
     55     itr += 1;
     56 
     57 
     58     std::string content = "";
     59 
     60     // this would be better if we didn't preallocate a size for this section
     61     // but that requires quite a bit more work for what I think is minimal benefit.
     62 
     63     // ENCODED HEADER (this data is encoded in the image itself):
     64     // chunk number  4 bytes (16 bits)
     65     // characters    4 bytes (16 bits)
     66     // fnamelen      4 bytes (16 bits)
     67     // filename      fnamelen 
     68     //
     69     // 16 + 16 + 16 = 48
     70 
     71 
     72 
     73     this->header.chunkNumber = 0;
     74     this->header.characters = 0;
     75     this->header.fnameLength = 0;
     76     this->header.filename = "";
     77 
     78     std::string encodedCNum = extractEncodedSection(fileString, itr, 4 * 8);
     79     std::string encodedCharacters = extractEncodedSection(fileString, itr, 4 * 8);
     80     std::string encodedFnameLength = extractEncodedSection(fileString, itr, 4 * 8);
     81 
     82     this->header.characters = std::bitset<4*8>(encodedCharacters).to_ullong();
     83     this->header.chunkNumber = std::bitset<4*8>(encodedCNum).to_ullong();
     84     this->header.fnameLength = std::bitset<4*8>(encodedFnameLength).to_ullong();
     85 
     86     std::string encodedFname = extractEncodedSection(fileString, itr, this->header.fnameLength);
     87     this->header.filename = binaryToAscii(encodedFname);
     88 
     89     LOG("ENCODED CNUM: " << encodedCNum)
     90     LOG("CNUM: " << this->header.chunkNumber)
     91 
     92     LOG("ENCODED CHARACTERS: " << encodedCharacters)
     93     LOG("CHARACTERS: " << this->header.characters)
     94 
     95     LOG("ENCODED FNAMELENGTH: " << encodedFnameLength)
     96     LOG("FNAMELENGTH: " << this->header.fnameLength)
     97 
     98 
     99     LOG("ENCODED FNAME: " << encodedFname)
    100     LOG("FNAME: " << this->header.filename)
    101 
    102     while(itr < fileString.size()) {
    103         if (fileString[itr] != ' ' && fileString[itr] != '\n') {
    104             content += fileString[itr];
    105         }
    106         itr += 1;
    107     }
    108 
    109     std::vector<char> bytes;
    110 
    111 
    112     for (uint i = 0 ; i < this->header.characters; ++i) {
    113 
    114         std::string current = "";
    115 
    116         for (int itr = 0; itr < 8; ++itr) {
    117 
    118             current += content[i * 8 + itr];
    119         }
    120 
    121         std::bitset<8> bits(current);
    122 
    123         char recovered_char = static_cast<char>(bits.to_ulong());
    124 
    125         bytes.push_back(recovered_char);
    126     }
    127 
    128     this->chunk = bytes;
    129 }
    130 
    131 Chunk::Chunk(std::vector<char> chunkData) {
    132     this->chunk = chunkData;
    133 }
    134 
    135 Chunk::Chunk(std::string filename, uint start, uint x, uint y) {
    136 
    137     std::ifstream file;
    138 
    139     file.open(filename, std::ios::binary);
    140     LOG("SEEKING: " + std::to_string(start))
    141     file.seekg(start);
    142 
    143     std::string filenameEncoded = "";
    144 
    145     for (std::size_t i = 0; i < filename.size(); ++i) {
    146         filenameEncoded += std::bitset<8>(filename.c_str()[i]).to_string();
    147     }   
    148 
    149     uint bytesToLoad = ((x * y) / 8) - (IMAGE_HEADER_SIZE + filenameEncoded.size());
    150     uint fSize = std::filesystem::file_size(filename) - start;
    151 
    152     if (fSize < bytesToLoad){
    153         bytesToLoad = fSize;
    154     }
    155 
    156     LOG("FILE SIZE: " + std::to_string(fSize))
    157     LOG("LOADING IN " + std::to_string(bytesToLoad) + " BYTES")
    158 
    159     std::vector<char> bytes(bytesToLoad);
    160 
    161     LOG("LOADED IN " + std::to_string(bytes.size()) + " BYTES")
    162 
    163     file.read(bytes.data(), bytesToLoad);
    164 
    165     file.close();
    166     this->chunk = bytes;
    167 }
    168 
    169 
    170 bool Chunk::operator<(const Chunk& other) const {
    171 
    172     if (this->header.filename == other.header.filename){
    173         return this->header.chunkNumber < other.header.chunkNumber;
    174     }
    175 
    176     return this->header.filename < other.header.filename;
    177 }
    178 
    179 
    180 int Chunk::getChunkNumber() const{
    181     return this->header.chunkNumber;
    182 }
    183 
    184 std::string Chunk::getFilename() const{
    185     return this->header.filename;
    186 }
    187 
    188 std::vector<char> Chunk::getChunk() {
    189     return this->chunk;
    190 }
    191 
    192 // use if header info is populated in the object
    193 // and you are trying to write back out what you read in.
    194 // append if not the first chunk.
    195 
    196 void Chunk::writeChunk() {
    197 
    198     LOG(this->header.filename)
    199 
    200     if(this->header.chunkNumber == 0){
    201         bool deleteExisting = std::filesystem::remove(this->header.filename);
    202         if(deleteExisting){
    203             LOG("FILE DELETED")
    204         }
    205         else{
    206             LOG("NO FILE TO DELETE, CONTINUING")
    207         }
    208     }
    209 
    210     std::ofstream outFile;
    211     std::vector<char> bytes = this->chunk;
    212 
    213     if(this->header.chunkNumber != 0) {
    214         outFile.open(this->header.filename, std::ios::binary | std::ios::app);
    215         LOG("FILE APPENDING")
    216     } else {
    217         outFile.open(this->header.filename, std::ios::binary | std::ios::out);
    218     }
    219 
    220     for(uint i = 0 ; i < bytes.size(); ++i) {
    221         outFile.write(&bytes[i], 1);
    222     }
    223     LOG("WROTE " + std::to_string(bytes.size()) + " CHARS")
    224     LOG("HEADER STATES " + std::to_string(this->header.characters) + " CHARS")
    225 }
    226 
    227 void Chunk::writeChunk(std::string filename) {
    228 
    229     LOG("Writing chunk")
    230     LOG(filename)
    231     std::ofstream outFile;
    232     std::vector<char> bytes = this->chunk;
    233 
    234 
    235     if(this->header.chunkNumber != 0) {
    236         outFile.open(filename, std::ios::binary | std::ios::app);
    237     } else {
    238         outFile.open(filename, std::ios::binary | std::ios::out);
    239     }
    240 
    241     for(uint i = 0 ; i < bytes.size(); ++i) {
    242         outFile.write(&bytes[i], 1);
    243     }
    244 }
    245 
    246 // HEADER:
    247 // - chunk number  4 bytes
    248 // - characters    4 bytes
    249 // - fnamelen      4 bytes
    250 // - filename      fnamelen
    251 
    252 void Chunk::writeImage(std::string filename, uint chunkNumber, std::string originalFilename, uint x, uint y) {
    253 
    254     Chunk chunk = this->chunk;
    255 
    256     std::vector<char> data = chunk.getChunk();
    257 
    258     std::string firstLine = "P1\n";
    259     std::string comment = "# Encoded with Andrew's awesome .pbm encoder.\n";
    260 
    261     std::string chunkNumberEncoded = std::bitset<8*4>(chunkNumber).to_string();
    262     std::string charactersEncoded = std::bitset<8*4>(data.size()).to_string();
    263 
    264     LOG(data.size())
    265 
    266     std::string dims = std::to_string(x);
    267 
    268     std::string filenameEncoded = "";
    269 
    270     // ascii encoding
    271     // todo:
    272     // create function for this.
    273 
    274     for (std::size_t i = 0; i < originalFilename.size(); ++i) {
    275         filenameEncoded += std::bitset<8>(originalFilename.c_str()[i]).to_string();
    276     }
    277 
    278     LOG(filenameEncoded)
    279 
    280     dims += " ";
    281     dims += std::to_string(y);
    282     dims += ("\n");
    283 
    284     std::string filenameLengthEncoded = std::bitset<8*4>(filenameEncoded.size()).to_string();
    285 
    286 
    287     std::string image = firstLine + comment + dims;
    288 
    289 
    290     std::string header = chunkNumberEncoded + charactersEncoded + filenameLengthEncoded + filenameEncoded;
    291 
    292     LOG(chunkNumberEncoded.size() + " - " + chunkNumberEncoded)
    293     LOG(charactersEncoded.size() + " - " + charactersEncoded)
    294     LOG(filenameLengthEncoded.size() + " - " + filenameLengthEncoded)
    295     LOG(filenameEncoded.size() + " - " + filenameEncoded)
    296 
    297     this->header.chunkNumber = chunkNumber;
    298     this->header.filename = filename;
    299     this->header.fnameLength = filenameEncoded.size();
    300     this->header.characters = data.size();
    301 
    302 
    303     uint xPos = 0;
    304 
    305     for (char bit : header) {
    306         if (xPos == x) {
    307             image.append("\n");
    308             xPos = 0;
    309         }
    310         image += bit;
    311         image.append(" ");
    312         xPos += 1;
    313     }
    314 
    315     int target = x * y;
    316     int added = 0;
    317 
    318     for (char chr : data) {
    319         if (added > target){
    320             LOG("BREAKING")
    321             break;
    322         }
    323         std::string current = std::bitset<8>(chr).to_string();
    324         for (int i = 0 ; i < 8; ++i) {
    325             if (xPos == x) {
    326                 image.append("\n");
    327                 xPos = 0;
    328             }
    329             image += current[i];
    330             added += 1;
    331             image.append(" ");
    332 
    333             xPos += 1;
    334         }
    335     }
    336 
    337     LOG("FINISHED WRITING " + std::to_string(added) + " CHARACTER BITS")
    338 
    339     // these are the bits that have been added.
    340     this->written = added / 8;
    341     LOG("ADDED TO IMAGE: " + std::to_string(this->written))
    342     LOG("FINISHED WRITING " + std::to_string(added / 8) + " CHARACTER BYTES")
    343 
    344     if (added<target){
    345         LOG("PADDING WITH ZEROES")
    346     }
    347 
    348     while (added < target) {
    349         if (xPos == x) {
    350             image += "\n";
    351             xPos = 0;
    352         }
    353         image += "0 ";
    354         added += 1;
    355         xPos += 1;
    356     }
    357 
    358     std::ofstream file = std::ofstream(filename);
    359 
    360     file << image;
    361 }
    362 
    363 
    364 std::string Chunk::toString() {
    365 
    366     std::string str = "";
    367 
    368     for(uint i = 0 ; i < this->chunk.size(); ++i) {
    369         str += chunk[i];
    370     }
    371     return str;
    372 }
    373 
    374 void Chunk::print() {
    375     for(uint i = 0 ; i < this->chunk.size(); ++i) {
    376         std::cout << chunk[i];
    377     }
    378 }
    379 
    380 std::string Chunk::extractEncodedSection(const std::string& fileString, uint& itr, int count) {
    381     std::string result;
    382     int collected = 0;
    383 
    384     while (collected < count && itr < fileString.size()) {
    385         char ch = fileString[itr];
    386 
    387         if (!std::isspace(static_cast<unsigned char>(ch))) {
    388             result += ch;
    389             collected += 1;
    390         }
    391         itr += 1;
    392     }
    393 
    394     return result;
    395 }
    396 
    397 std::string Chunk::binaryToAscii(const std::string& binaryStr) {
    398     std::string asciiStr;
    399 
    400     if (binaryStr.size() % 8 != 0) {
    401         throw std::invalid_argument("Length must be multiple of 8");
    402     }
    403 
    404     for (size_t i = 0; i < binaryStr.size(); i += 8) {
    405         std::string byteString = binaryStr.substr(i, 8);
    406         char c = static_cast<char>(std::stoi(byteString, nullptr, 2));
    407         asciiStr += c;
    408     }
    409 
    410     return asciiStr;
    411 }