Reordered varicode escapes for better efficiency

This commit is contained in:
Jordan Sherer 2018-07-30 22:28:53 -04:00
parent e1530c147c
commit 0a7c4a68de

View File

@ -155,10 +155,10 @@ the = 12 bits
and = 13 bits and = 13 bits
tha = 13 bits tha = 13 bits
ent = 11 bits ent = 11 bits
ing = 14 bits ing = 14 bits **
ion = 12 bits ion = 12 bits
tio = 12 bits tio = 12 bits
for = 15 bits for = 15 bits **
nde = 12 bits nde = 12 bits
has = 14 bits has = 14 bits
nce = 13 bits nce = 13 bits
@ -223,8 +223,10 @@ needed: ^,&@#$%'"()<>|*[]{}=;_~`
QMap<QString, QString> huffescapes = { QMap<QString, QString> huffescapes = {
// 10 bits // 10 bits
{ "\\ ", "," }, #if 0
{ "\\E", "\'" }, { "\\ ", "" },
#endif
{ "\\E", "," },
// 11 bits // 11 bits
{ "\\T", "&" }, { "\\T", "&" },
@ -234,62 +236,63 @@ QMap<QString, QString> huffescapes = {
{ "\\N", "%" }, { "\\N", "%" },
// 12 bits // 12 bits
{ "\\S", "^" }, { "\\S", "\'" },
{ "\\H", "\"" }, { "\\H", "\"" },
{ "\\R", "(" }, { "\\R", "(" },
{ "\\D", ")" }, { "\\D", ")" },
{ "\\L", "|" }, { "\\L", "|" },
// 13 bits // 13 bits
{ "\\C", "<" },
{ "\\U", ">" },
{ "\\M", "*" },
{ "\\W", "[" },
{ "\\F", "]" },
{ "\\G", "{" },
{ "\\Q", "}" },
{ "\\Y", "=" },
{ "\\P", ";" },
{ "\\B", "_" },
// 14 bits
{ "\\.", "~" },
{ "\\0", "`" },
// trigram / quadgram efficiency // trigram / quadgram efficiency
{ "\\1", "WAS" }, // 16 bits - 2 bit savings { "\\C", "YOU" }, // 16 bits - 3 bit savings
{ "\\2", "THAT" }, // 17 bits - 3 bit savings { "\\U", "THAT" }, // 17 bits - 4 bit savings
{ "\\3", "THER" }, // 17 bits - 3 bit savings { "\\M", "THER" }, // 17 bits - 4 bit savings
{ "\\4", "WITH" }, // 18 bits - 4 bit savings { "\\W", "WITH" }, // 18 bits - 5 bit savings
{ "\\5", "TION" }, // 16 bits - 2 bit savings { "\\F", "TION" }, // 16 bits - 3 bit savings
{ "\\6", "HERE" }, // 16 bits - 2 bit savings { "\\G", "HERE" }, // 16 bits - 3 bit savings
{ "\\7", "OULD" }, // 20 bits - 6 bit savings { "\\Q", "OULD" }, // 20 bits - 7 bit savings
{ "\\8", "IGHT" }, // 19 bits - 5 bit savings { "\\Y", "IGHT" }, // 19 bits - 6 bit savings
{ "\\9", "HAVE" }, // 19 bits - 5 bit savings { "\\P", "HAVE" }, // 19 bits - 6 bit savings
{ "\\?", "HICH" }, // 20 bits - 6 bit savings { "\\B", "HICH" }, // 20 bits - 7 bit savings
{ "\\/", "WHIC" }, // 21 bits - 7 bit savings
{ "\\V", "THIS" }, // 18 bits - 4 bit savings #if 0
// 14 bits
{ "\\.", "" },
{ "\\0", "" },
#endif
{ "\\1", "<" },
{ "\\2", ">" },
{ "\\3", "[" },
{ "\\4", "]" },
{ "\\5", "{" },
{ "\\6", "}" },
{ "\\7", "*" },
{ "\\8", "=" },
{ "\\9", ";" },
{ "\\?", "WHIC" }, // 21 bits - 7 bit savings
{ "\\/", "THIS" }, // 18 bits - 4 bit savings
{ "\\V", "FROM" }, // 21 bits - 7 bit savings
// 15 bits // 15 bits
// quadgram efficiency // quadgram efficiency
{ "\\K" , "FROM" }, // 21 bits - 6 bit savings { "\\K" , "OUGH" }, // 21 bits - 6 bit savings
// 17 bits // 17 bits
// quadgram efficiency
{ "\\J" , "OUGH" }, // 21 bits - 4 bit savings
{ "\\X" , "599" }, // 21 bits - 4 bit savings
// 18 bits
// quadgram efficiency
{ "\\Z" , "..." }, // 21 bits - 3 bit savings
#if 0 #if 0
{ "\\:" , "" }, { "\\J" , "" },
{ "\\X" , "" },
// 19 bits
{ "\\+" , "" },
{ "\\-" , "" },
#endif #endif
// 18 bits
{ "\\Z" , "^" },
{ "\\:" , "~" },
// 19 bits
{ "\\+" , "`" },
{ "\\-" , "_" },
// special case :)
{ "\\!" , "FT8CALL" }, // 37 bits - 18 bit savings { "\\!" , "FT8CALL" }, // 37 bits - 18 bit savings
}; };
@ -497,11 +500,9 @@ QList<QPair<int, QVector<bool>>> Varicode::huffEncode(const QMap<QString, QStrin
}); });
while(i < text.length()){ while(i < text.length()){
qDebug() << i << text.length();
bool found = false; bool found = false;
foreach(auto ch, keys){ foreach(auto ch, keys){
if(text.midRef(i).startsWith(ch)){ if(text.midRef(i).startsWith(ch)){
qDebug() << text.midRef(i) << ch;
out.append({ ch.length(), Varicode::strToBits(huff[ch])}); out.append({ ch.length(), Varicode::strToBits(huff[ch])});
i += ch.length(); i += ch.length();
found = true; found = true;
@ -514,14 +515,14 @@ QList<QPair<int, QVector<bool>>> Varicode::huffEncode(const QMap<QString, QStrin
} }
} }
/* #if 0
foreach(auto ch, text){ foreach(auto ch, text){
if(!huff.contains(ch)){ if(!huff.contains(ch)){
continue; continue;
} }
out.append(Varicode::strToBits(huff[ch])); out.append(Varicode::strToBits(huff[ch]));
} }
*/ #endif
return out; return out;
} }