| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  | /**
 | 
					
						
							| 
									
										
										
										
											2018-10-04 13:52:52 -04:00
										 |  |  |  * This file is part of JS8Call. | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |  * | 
					
						
							|  |  |  |  * This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  |  * it under the terms of the GNU General Public License as published by | 
					
						
							|  |  |  |  * the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  |  * (at your option) any later version. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  * GNU General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * (C) 2018 Jordan Sherer <kn4crd@gmail.com> - All Rights Reserved | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  **/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "jsc.h"
 | 
					
						
							|  |  |  | #include "varicode.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cmath>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-08 15:36:09 -04:00
										 |  |  | #include <QDebug>
 | 
					
						
							| 
									
										
										
										
											2018-10-15 03:03:26 -04:00
										 |  |  | #include <QCache>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | QMap<QString, quint32> LOOKUP_CACHE; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | Codeword JSC::codeword(quint32 index, bool separate, quint32 bytesize, quint32 s, quint32 c){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |     QList<Codeword> out; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  |     quint32 v = ((index % s) << 1) + (quint32)separate; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |     out.prepend(Varicode::intToBits(v, bytesize + 1)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     quint32 x = index / s; | 
					
						
							|  |  |  |     while(x > 0){ | 
					
						
							|  |  |  |         x -= 1; | 
					
						
							|  |  |  |         out.prepend(Varicode::intToBits((x % c) + s, bytesize)); | 
					
						
							|  |  |  |         x /= c; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Codeword word; | 
					
						
							|  |  |  |     foreach(auto w, out){ | 
					
						
							|  |  |  |         word.append(w); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return word; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | QList<CodewordPair> JSC::compress(QString text){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |     QList<CodewordPair> out; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const quint32 b = 4; | 
					
						
							|  |  |  |     const quint32 s = 7; | 
					
						
							|  |  |  |     const quint32 c = pow(2, 4) - s; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-08 15:36:09 -04:00
										 |  |  |     QString space(" "); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  |     QStringList words = text.split(" ", QString::KeepEmptyParts); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(int i = 0, len = words.length(); i < len; i++){ | 
					
						
							|  |  |  |         QString w = words[i]; | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  |         bool isLastWord = (i == len - 1); | 
					
						
							|  |  |  |         bool ok = false; | 
					
						
							| 
									
										
										
										
											2018-10-08 15:36:09 -04:00
										 |  |  |         bool isSpaceCharacter = false; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  |         // if this is an empty part, it should be a space, unless its the last word.
 | 
					
						
							|  |  |  |         if(w.isEmpty() && !isLastWord){ | 
					
						
							| 
									
										
										
										
											2018-10-08 15:36:09 -04:00
										 |  |  |             w = space; | 
					
						
							|  |  |  |             isSpaceCharacter = true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  |         while(!w.isEmpty()){ | 
					
						
							|  |  |  |             // this does both prefix and full match lookup
 | 
					
						
							|  |  |  |             auto index = lookup(w, &ok); | 
					
						
							|  |  |  |             if(!ok){ | 
					
						
							|  |  |  |                 break; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |             auto t = JSC::map[index]; | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-09 14:12:00 -04:00
										 |  |  |             w = QString(w).mid(t.size); | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |             bool isLast = w.isEmpty(); | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  |             bool shouldAppendSpace = isLast && !isSpaceCharacter && !isLastWord; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-08 15:36:09 -04:00
										 |  |  |             out.append({ codeword(index, shouldAppendSpace, b, s, c), (quint32)t.size + (shouldAppendSpace ? 1 : 0) /* for the space that follows */}); | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return out; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | QString JSC::decompress(Codeword const& bitvec){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |     const quint32 b = 4; | 
					
						
							|  |  |  |     const quint32 s = 7; | 
					
						
							|  |  |  |     const quint32 c = pow(2, b) - s; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     QStringList out; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     quint32 base[8]; | 
					
						
							|  |  |  |     base[0] = 0; | 
					
						
							|  |  |  |     base[1] = s; | 
					
						
							|  |  |  |     base[2] = base[1] + s*c; | 
					
						
							|  |  |  |     base[3] = base[2] + s*c*c; | 
					
						
							|  |  |  |     base[4] = base[3] + s*c*c*c; | 
					
						
							|  |  |  |     base[5] = base[4] + s*c*c*c*c; | 
					
						
							|  |  |  |     base[6] = base[5] + s*c*c*c*c*c; | 
					
						
							|  |  |  |     base[7] = base[6] + s*c*c*c*c*c*c; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     QList<quint64> bytes; | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |     QList<quint32> separators; | 
					
						
							| 
									
										
										
										
											2018-11-03 01:14:31 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     int i = 0; | 
					
						
							|  |  |  |     int count = bitvec.count(); | 
					
						
							|  |  |  |     while(i < count){ | 
					
						
							|  |  |  |         auto b = bitvec.mid(i, 4); | 
					
						
							|  |  |  |         if(b.length() != 4){ | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         quint64 byte = Varicode::bitsToInt(b); | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |         bytes.append(byte); | 
					
						
							| 
									
										
										
										
											2018-11-03 01:14:31 -04:00
										 |  |  |         i += 4; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if(byte < s){ | 
					
						
							| 
									
										
										
										
											2018-11-03 01:14:31 -04:00
										 |  |  |             if(count - i > 0 && bitvec.at(i)){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |                 separators.append(bytes.length()-1); | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2018-11-03 01:14:31 -04:00
										 |  |  |             i += 1; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |     quint32 start = 0; | 
					
						
							|  |  |  |     while(start < (quint32)bytes.length()){ | 
					
						
							|  |  |  |         quint32 k = 0; | 
					
						
							|  |  |  |         quint32 j = 0; | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |         while(start + k < (quint32)bytes.length() && bytes[start + k] >= s){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |             j = j*c + (bytes[start + k] - s); | 
					
						
							|  |  |  |             k++; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |         if(j >= JSC::size){ | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |         if(start + k >= (quint32)bytes.length()){ | 
					
						
							| 
									
										
										
										
											2018-11-03 22:14:42 -04:00
										 |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |         j = j*s + bytes[start + k] + base[k]; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-25 20:13:00 -05:00
										 |  |  |         if(j >= JSC::size){ | 
					
						
							| 
									
										
										
										
											2018-11-03 22:14:42 -04:00
										 |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-10-03 20:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-22 01:34:57 -05:00
										 |  |  |         // map is in latin1 format, not utf-8
 | 
					
						
							|  |  |  |         auto word = QLatin1String(JSC::map[j].str); | 
					
						
							| 
									
										
										
										
											2018-10-09 14:12:00 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         out.append(word); | 
					
						
							| 
									
										
										
										
											2018-10-02 18:03:15 -04:00
										 |  |  |         if(!separators.isEmpty() && separators.first() == start + k){ | 
					
						
							| 
									
										
										
										
											2018-09-30 17:17:47 -04:00
										 |  |  |             out.append(" "); | 
					
						
							|  |  |  |             separators.removeFirst(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         start = start + (k + 1); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return out.join(""); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-30 20:18:35 -05:00
										 |  |  | bool JSC::exists(QString w, quint32 *pIndex){ | 
					
						
							|  |  |  |     bool found = false; | 
					
						
							|  |  |  |     quint32 index = lookup(w, &found); | 
					
						
							|  |  |  |     if(pIndex) *pIndex = index; | 
					
						
							|  |  |  |     return found && JSC::map[index].size == w.length(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | quint32 JSC::lookup(QString w, bool * ok){ | 
					
						
							| 
									
										
										
										
											2018-10-15 03:03:26 -04:00
										 |  |  |     if(LOOKUP_CACHE.contains(w)){ | 
					
						
							|  |  |  |         if(ok) *ok = true; | 
					
						
							|  |  |  |         return LOOKUP_CACHE[w]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     bool found = false; | 
					
						
							|  |  |  |     quint32 result = lookup(w.toLatin1().data(), &found); | 
					
						
							|  |  |  |     if(found){ | 
					
						
							|  |  |  |         LOOKUP_CACHE[w] = result; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if(ok) *ok = found; | 
					
						
							|  |  |  |     return result; | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | quint32 JSC::lookup(char const* b, bool *ok){ | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  |     quint32 index = 0; | 
					
						
							|  |  |  |     quint32 count = 0; | 
					
						
							|  |  |  |     bool found = false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // first find prefix match to jump into the list faster
 | 
					
						
							|  |  |  |     for(quint32 i = 0; i < JSC::prefixSize; i++){ | 
					
						
							|  |  |  |         // skip obvious non-prefixes...
 | 
					
						
							|  |  |  |         if(b[0] != JSC::prefix[i].str[0]){ | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // ok, we found one... let's end early for single char strings.
 | 
					
						
							|  |  |  |         if(JSC::prefix[i].size == 1){ | 
					
						
							|  |  |  |             if(ok) *ok = true; | 
					
						
							|  |  |  |             return JSC::list[JSC::prefix[i].index].index; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // otherwise, keep track of the first index in the list and the number of elements
 | 
					
						
							|  |  |  |         index = JSC::prefix[i].index; | 
					
						
							|  |  |  |         count = JSC::prefix[i].size; | 
					
						
							|  |  |  |         found = true; | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // no prefix found... no lookup
 | 
					
						
							|  |  |  |     if(!found){ | 
					
						
							|  |  |  |         if(ok) *ok = false; | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // now that we have the first index in the list, let's just iterate through the list, comparing words along the way
 | 
					
						
							|  |  |  |     for(quint32 i = index; i < index + count; i++){ | 
					
						
							|  |  |  |         quint32 len = JSC::list[i].size; | 
					
						
							|  |  |  |         if(strncmp(b, JSC::list[i].str, len) == 0){ | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  |             if(ok) *ok = true; | 
					
						
							| 
									
										
										
										
											2018-10-06 01:43:47 -04:00
										 |  |  |             return JSC::list[i].index; | 
					
						
							| 
									
										
										
										
											2018-10-01 09:57:37 -04:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if(ok) *ok = false; | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } |