Varicode packing of trigram and quadgrams

This commit is contained in:
Jordan Sherer 2018-07-30 21:26:36 -04:00
parent 5238e1ade8
commit ff77effb3e
3 changed files with 183 additions and 126 deletions

View File

@ -5884,7 +5884,7 @@ QStringList MainWindow::buildFT8MessageFrames(QString const& text){
// packDataMessage can output a new line to datLineOut (huff escaping special characters) // packDataMessage can output a new line to datLineOut (huff escaping special characters)
int m = 0; int m = 0;
QString datLineOut; QString datLineOut;
QString datFrame = Varicode::packDataMessage(line.left(21) + "\x04", &datLineOut, &m); // 63 / 3 = 21 (maximum number of 3bit chars we could possibly stuff in here) QString datFrame = Varicode::packDataMessage(line.left(24) + "\x04", &datLineOut, &m); // 66 / 3 + 2 = 22 (maximum number of 3bit chars we could possibly stuff in here plus 2 for good measure :P)
// if this parses to a standard FT8 free text message // if this parses to a standard FT8 free text message
// but it can be parsed as a directed message, then we // but it can be parsed as a directed message, then we

View File

@ -75,71 +75,71 @@ QRegularExpression directed_re("^"
"(?<num>\\s?[-+]?(?:3[01]|[0-2]?[0-9]))?" "(?<num>\\s?[-+]?(?:3[01]|[0-2]?[0-9]))?"
); );
QMap<QChar, QString> hufftable = { QMap<QString, QString> hufftable = {
// char code weight // char code weight
// 3 bits // 3 bits
{ ' ' , "000" }, // 1300 { " " , "000" }, // 1300
{ 'E' , "001" }, // 1270.2 { "E" , "001" }, // 1270.2
// 4 bits // 4 bits
{ 'T' , "1100" }, // 905.6 { "T" , "1100" }, // 905.6
{ 'A' , "1010" }, // 816.7 { "A" , "1010" }, // 816.7
{ 'O' , "0111" }, // 750.7 { "O" , "0111" }, // 750.7
{ 'I' , "0101" }, // 696.6 { "I" , "0101" }, // 696.6
{ 'N' , "0100" }, // 674.9 { "N" , "0100" }, // 674.9
// 5 bits // 5 bits
{ 'S' , "11111" }, // 632.7 { "S" , "11111" }, // 632.7
{ 'H' , "11110" }, // 609.4 { "H" , "11110" }, // 609.4
{ 'R' , "11101" }, // 598.7 { "R" , "11101" }, // 598.7
{ 'D' , "10111" }, // 425.3 { "D" , "10111" }, // 425.3
{ 'L' , "10110" }, // 402.5 { "L" , "10110" }, // 402.5
// 6 bits // 6 bits
{ 'C' , "111001" }, // 278.2 { "C" , "111001" }, // 278.2
{ 'U' , "111000" }, // 275.8 { "U" , "111000" }, // 275.8
{ 'M' , "110111" }, // 240.6 { "M" , "110111" }, // 240.6
{ 'W' , "110110" }, // 236.0 { "W" , "110110" }, // 236.0
{ 'F' , "110100" }, // 222.8 { "F" , "110100" }, // 222.8
{ 'G' , "100111" }, // 201.5 { "G" , "100111" }, // 201.5
{ 'Q' , "100110" }, // 200 { "Q" , "100110" }, // 200
{ 'Y' , "011010" }, // 197.4 { "Y" , "011010" }, // 197.4
{ 'P' , "011001" }, // 192.9 { "P" , "011001" }, // 192.9
{ 'B' , "011000" }, // 149.2 { "B" , "011000" }, // 149.2
// 7 bits // 7 bits
{ '\\' , "0110111" }, // 100 <- escape { "\\" , "0110111" }, // 100 <- escape
{ '.' , "1000000" }, // 100 { "." , "1000000" }, // 100
{ '0' , "1000001" }, // 100 { "0" , "1000001" }, // 100
{ '1' , "1000010" }, // 100 { "1" , "1000010" }, // 100
{ '2' , "1000011" }, // 100 { "2" , "1000011" }, // 100
{ '3' , "1000100" }, // 100 { "3" , "1000100" }, // 100
{ '4' , "1000101" }, // 100 { "4" , "1000101" }, // 100
{ '5' , "1000110" }, // 100 { "5" , "1000110" }, // 100
{ '6' , "1000111" }, // 100 { "6" , "1000111" }, // 100
{ '7' , "1001000" }, // 100 { "7" , "1001000" }, // 100
{ '8' , "1001001" }, // 100 { "8" , "1001001" }, // 100
{ '9' , "1001010" }, // 100 { "9" , "1001010" }, // 100
{ '?' , "1001011" }, // 100 { "?" , "1001011" }, // 100
{ '/' , "1101010" }, // 100 { "/" , "1101010" }, // 100
{ 'V' , "0110110" }, // 97.8 { "V" , "0110110" }, // 97.8
// 8 bits // 8 bits
{ 'K' , "11010111" }, // 77.2 { "K" , "11010111" }, // 77.2
// 10 bits // 10 bits
{ 'J' , "1101011010" }, // 15.3 { "J" , "1101011010" }, // 15.3
{ 'X' , "1101011001" }, // 15.0 { "X" , "1101011001" }, // 15.0
// 11 bits // 11 bits
{ 'Z' , "11010110110" }, // 7.4 { "Z" , "11010110110" }, // 7.4
{ ':' , "11010110000" }, // 5 { ":" , "11010110000" }, // 5
// 12 bits // 12 bits
{ '+' , "110101100011" }, // 5 { "+" , "110101100011" }, // 5
{ '-' , "110101101110" }, // 5 { "-" , "110101101110" }, // 5
{ '!' , "110101101111" }, // 5 { "!" , "110101101111" }, // 5
{ '\x04' , "110101100010" }, // 1 <- eot { "\x04" , "110101100010" }, // 1 <- eot
/* /*
A-Z 0-9 Space \\ ? / : - + ! A-Z 0-9 Space \\ ? / : - + !
@ -148,7 +148,9 @@ QMap<QChar, QString> hufftable = {
}; };
/* /*
Most common trigrams: via https://www3.nd.edu/~busiforc/handouts/cryptography/Letter%20Frequencies.html#Most_common_trigrams_.28in_order.29
most common trigrams:
the = 12 bits the = 12 bits
and = 13 bits and = 13 bits
tha = 13 bits tha = 13 bits
@ -156,7 +158,7 @@ ent = 11 bits
ing = 14 bits ing = 14 bits
ion = 12 bits ion = 12 bits
tio = 12 bits tio = 12 bits
for = 15 bits ** for = 15 bits
nde = 12 bits nde = 12 bits
has = 14 bits has = 14 bits
nce = 13 bits nce = 13 bits
@ -170,15 +172,15 @@ hat = 13 bits
his = 14 bits his = 14 bits
ere = 11 bits ere = 11 bits
ter = 12 bits ter = 12 bits
was = 15 bits ** was = 15 bits
you = 16 bits ** you = 16 bits **
ith = 13 bits ith = 13 bits
ver = 15 bits ** ver = 15 bits
all = 14 bits all = 14 bits
wit = 14 bits wit = 14 bits
thi = 13 bits thi = 13 bits
quadgrams: most common quadgrams:
that = 17 bits that = 17 bits
ther = 17 bits ther = 17 bits
with = 18 bits with = 18 bits
@ -200,6 +202,17 @@ were = 17 bits
hing = 18 bits hing = 18 bits
ment = 17 bits ment = 17 bits
potential contenders:
_DE_ = 14 bits
BTU = 16 bits
... = 21 bits
599 = 21 bits
FT8 = 17 bits
BAND = 19 bits
FT8CALL = 37 bits
DIPOLE = 27 bits
VERT = 19 bits
BEAM = 19 bits
*/ */
@ -207,68 +220,70 @@ ment = 17 bits
original: Space \\ ? / : - + ! original: Space \\ ? / : - + !
needed: ^,&@#$%'"()<>|*[]{}=;_~` needed: ^,&@#$%'"()<>|*[]{}=;_~`
*/ */
QMap<QString, QChar> huffescapes = { QMap<QString, QString> huffescapes = {
{ "\\ ", '^' }, { "\\ ", "^" },
{ "\\E", ',' }, { "\\E", "," },
{ "\\T", '&' }, { "\\T", "&" },
{ "\\A", '@' }, { "\\A", "@" },
{ "\\O", '#' }, { "\\O", "#" },
{ "\\I", '$' }, { "\\I", "$" },
{ "\\N", '%' }, { "\\N", "%" },
{ "\\S", '\'' }, { "\\S", "\'" },
{ "\\H", '\"' }, { "\\H", "\"" },
{ "\\R", '(' }, { "\\R", "(" },
{ "\\D", ')' }, { "\\D", ")" },
{ "\\L", '<' }, { "\\L", "<" },
{ "\\C", '>' }, { "\\C", ">" },
{ "\\U", '|' }, { "\\U", "|" },
{ "\\M", '*' }, { "\\M", "*" },
{ "\\W", '[' }, { "\\W", "[" },
{ "\\F", ']' }, { "\\F", "]" },
{ "\\G", '{' }, { "\\G", "{" },
{ "\\Q", '}' }, { "\\Q", "}" },
{ "\\Y", '=' }, { "\\Y", "=" },
{ "\\P", ';' }, { "\\P", ";" },
{ "\\B", '_' }, { "\\B", "_" },
{ "\\.", '~' }, { "\\.", "~" },
{ "\\0", '`' }, { "\\0", "`" },
#if 0
// 14 bits // 14 bits
{ "\\1", '' }, // trigram efficiency
{ "\\2", '' }, { "\\1", "WAS" }, // 16 bits - 2 bit savings
{ "\\3", '' },
{ "\\4", '' }, // quadgram efficiency
{ "\\5", '' }, { "\\2", "THAT" }, // 17 bits - 3 bit savings
{ "\\6", '' }, { "\\3", "THER" }, // 17 bits - 3 bit savings
{ "\\7", '' }, { "\\4", "WITH" }, // 18 bits - 4 bit savings
{ "\\8", '' }, { "\\5", "TION" }, // 16 bits - 2 bit savings
{ "\\9", '' }, { "\\6", "HERE" }, // 16 bits - 2 bit savings
{ "\\?", '' }, { "\\7", "OULD" }, // 20 bits - 6 bit savings
{ "\\/", '' }, { "\\8", "IGHT" }, // 19 bits - 5 bit savings
{ "\\V", '' }, { "\\9", "HAVE" }, // 19 bits - 5 bit savings
{ "\\?", "HICH" }, // 20 bits - 6 bit savings
{ "\\/", "WHIC" }, // 21 bits - 7 bit savings
{ "\\V", "THIS" }, // 18 bits - 4 bit savings
// 15 bits // 15 bits
{ "\\K" , '' }, // quadgram efficiency
{ "\\K" , "FROM" }, // 21 bits - 6 bit savings
// 17 bits // 17 bits
{ "\\J" , '' }, // quadgram efficiency
{ "\\X" , '' }, { "\\J" , "OUGH" }, // 21 bits - 4 bit savings
{ "\\X" , "599" }, // 21 bits - 4 bit savings
#if 0
// 18 bits // 18 bits
{ "\\Z" , '' }, // quadgram efficiency
{ "\\:" , '' }, { "\\Z" , "" },
{ "\\:" , "" },
// 19 bits // 19 bits
{ "\\+" , '' }, { "\\+" , "" },
{ "\\-" , '' }, { "\\-" , "" },
{ "\\!" , '' },
#endif #endif
{ "\\!" , "FT8CALL" }, // 37 bits - 18 bit savings
}; };
QChar ESC = '\\'; // Escape char QChar ESC = '\\'; // Escape char
@ -283,15 +298,15 @@ QMap<QString, quint32> basecalls = {
}; };
QMap<int, int> dbm2mw = { QMap<int, int> dbm2mw = {
{0 , 1}, {0 , 1}, // 1mW
{3 , 2}, {3 , 2}, // 2mW
{7 , 5}, {7 , 5}, // 5mW
{10 , 10}, {10 , 10}, // 10mW
{13 , 20}, {13 , 20}, // 20mW
{17 , 50}, {17 , 50}, // 50mW
{20 , 100}, {20 , 100}, // 100mW
{23 , 200}, {23 , 200}, // 200mW
{27 , 500}, {27 , 500}, // 500mW
{30 , 1000}, // 1W {30 , 1000}, // 1W
{33 , 2000}, // 2W {33 , 2000}, // 2W
{37 , 5000}, // 5W {37 , 5000}, // 5W
@ -305,12 +320,16 @@ QMap<int, int> dbm2mw = {
}; };
QMap<QChar, QString> initializeEscapes(QMap<QChar, QString> huff, QMap<QString, QChar> escapes){ QMap<QString, QString> initializeEscapes(QMap<QString, QString> huff, QMap<QString, QString> escapes){
QMap<QChar, QString> newhuff(huff); QMap<QString, QString> newhuff(huff);
foreach(auto escapeString, escapes.keys()){ foreach(auto escapeString, escapes.keys()){
auto ch = escapes[escapeString]; auto ch = escapes[escapeString];
auto encoded = Varicode::huffEncode(huff, escapeString); auto encoded = Varicode::huffEncode(huff, escapeString);
auto bits = Varicode::bitsListToBits(encoded); QList<QVector<bool>> e;
foreach(auto pair, encoded){
e.append(pair.second);
}
auto bits = Varicode::bitsListToBits(e);
newhuff[ch] = Varicode::bitsToStr(bits); newhuff[ch] = Varicode::bitsToStr(bits);
} }
@ -327,7 +346,7 @@ QMap<QChar, QString> initializeEscapes(QMap<QChar, QString> huff, QMap<QString,
return newhuff; return newhuff;
} }
QMap<QChar, QString> hufftableescaped = initializeEscapes(hufftable, huffescapes); QMap<QString, QString> hufftableescaped = initializeEscapes(hufftable, huffescapes);
/* /*
* UTILITIES * UTILITIES
@ -451,20 +470,56 @@ QStringList Varicode::parseGrids(const QString &input){
return grids; return grids;
} }
QList<QVector<bool>> Varicode::huffEncode(QMap<QChar, QString> const &huff, QString const& text){ QList<QPair<int, QVector<bool>>> Varicode::huffEncode(const QMap<QString, QString> &huff, QString const& text){
QList<QVector<bool>> out; QList<QPair<int, QVector<bool>>> out;
int i = 0;
auto keys = huff.keys();
qSort(keys.begin(), keys.end(), [](QString const &a, QString const &b){
auto alen = a.length();
auto blen = b.length();
if(blen < alen){
return true;
}
if(alen < blen){
return false;
}
return b < a;
});
while(i < text.length()){
qDebug() << i << text.length();
bool found = false;
foreach(auto ch, keys){
if(text.midRef(i).startsWith(ch)){
qDebug() << text.midRef(i) << ch;
out.append({ ch.length(), Varicode::strToBits(huff[ch])});
i += ch.length();
found = true;
break;
}
}
if(!found){
i++;
}
}
/*
foreach(auto ch, text){ foreach(auto ch, text){
if(!huff.contains(ch)){ if(!huff.contains(ch)){
continue; continue;
} }
out.append(Varicode::strToBits(huff[ch])); out.append(Varicode::strToBits(huff[ch]));
} }
*/
return out; return out;
} }
QString Varicode::huffDecode(QMap<QChar, QString> const &huff, QVector<bool> const& bitvec){ QString Varicode::huffDecode(QMap<QString, QString> const &huff, QVector<bool> const& bitvec){
QString text; QString text;
QString bits = Varicode::bitsToStr(bitvec); //.mid(0, bitvec.length()-pad); QString bits = Varicode::bitsToStr(bitvec); //.mid(0, bitvec.length()-pad);
@ -510,8 +565,8 @@ QString Varicode::huffEscape(QString const &input){
return text; return text;
} }
QSet<QChar> Varicode::huffValidChars(){ QSet<QString> Varicode::huffValidChars(){
return QSet<QChar>::fromList(hufftableescaped.keys()); return QSet<QString>::fromList(hufftableescaped.keys());
} }
bool Varicode::huffShouldEscape(QString const &input){ bool Varicode::huffShouldEscape(QString const &input){
@ -1203,10 +1258,12 @@ QString Varicode::packDataMessage(const QString &input, QString * out, int *n){
int i = 0; int i = 0;
// we use the escaped table here, so they the escapes and the characters are packed together... // we use the escaped table here, so they the escapes and the characters are packed together...
foreach(auto charBits, Varicode::huffEncode(hufftableescaped, input)){ foreach(auto pair, Varicode::huffEncode(hufftableescaped, input)){
auto charN = pair.first;
auto charBits = pair.second;
if(frameHeaderBits.length() + frameDataBits.length() + charBits.length() <= 69){ if(frameHeaderBits.length() + frameDataBits.length() + charBits.length() <= 69){
frameDataBits += charBits; frameDataBits += charBits;
i++; i += charN;
continue; continue;
} }
break; break;

View File

@ -51,12 +51,12 @@ public:
static QStringList parseCallsigns(QString const &input); static QStringList parseCallsigns(QString const &input);
static QStringList parseGrids(QString const &input); static QStringList parseGrids(QString const &input);
static QList<QVector<bool>> huffEncode(const QMap<QChar, QString> &huff, QString const& text); static QList<QPair<int, QVector<bool>>> huffEncode(const QMap<QString, QString> &huff, QString const& text);
static QString huffDecode(const QMap<QChar, QString> &huff, QVector<bool> const& bitvec); static QString huffDecode(const QMap<QString, QString> &huff, QVector<bool> const& bitvec);
static QString huffUnescape(QString const &input); static QString huffUnescape(QString const &input);
static QString huffEscape(QString const &input); static QString huffEscape(QString const &input);
static QSet<QChar> huffValidChars(); static QSet<QString> huffValidChars();
static bool huffShouldEscape(QString const &input); static bool huffShouldEscape(QString const &input);
static QVector<bool> bytesToBits(char * bitvec, int n); static QVector<bool> bytesToBits(char * bitvec, int n);