const bitToNucleotide = {
"00": "A",
"01": "C",
"10": "G",
"11": "T"
};
const nucleotideToBit = {
"A": "00",
"C": "01",
"G": "10",
"T": "11"
};
// Function to encode a DNA string into a Uint8Array
const encodeDNA = (dnaString) => {
let bitString = "";
// Convert each nucleotide to its corresponding bit pattern
for (let i = 0; i < dnaString.length; i++) {
bitString += nucleotideToBit[dnaString[i]];
}
// Pad the bitString to make it a multiple of 8 bits (if necessary)
const paddingLength = (8 - (bitString.length % 8)) % 8;
bitString = bitString.padEnd(bitString.length + paddingLength, '0');
// Convert the bitString to a Uint8Array
const byteArray = new Uint8Array(bitString.length / 8);
for (let i = 0; i < bitString.length; i += 8) {
byteArray[i / 8] = parseInt(bitString.substring(i, i + 8), 2);
}
return byteArray;
};
// Function to decode a bit-encoded DNA Uint8Array into a DNA string
const decodeDNA = (bitArray) => {
let dnaSequence = "";
const bitLength = bitArray.length * 8;
for (let bitPos = 0; bitPos < bitLength; bitPos += 2) {
const byteOffset = Math.floor(bitPos / 8);
const bitOffset = bitPos % 8;
// Extract the 2 bits representing the nucleotide
const bits = ((bitArray[byteOffset] >> (6 - bitOffset)) & 0b11).toString(2).padStart(2, '0');
// Convert the bits to a nucleotide and append to the result string
dnaSequence += bitToNucleotide[bits];
}
return dnaSequence;
};
// Function to convert a DNA string pattern to its bitwise integer representation
const dnaPatternToBits = (pattern) => {
let bitString = "";
for (let i = 0; i < pattern.length; i++) {
bitString += nucleotideToBit[pattern[i]];
}
return parseInt(bitString, 2);
};
// Function to search for the pattern
const searchDNA = (bitArray, patternPrefix, patternSuffix, wildcardLength) => {
const matches = [];
const bitLength = bitArray.length * 8;
const prefixBits = dnaPatternToBits(patternPrefix);
const suffixBits = dnaPatternToBits(patternSuffix);
const prefixLength = patternPrefix.length * 2; // Convert length to bits
const suffixLength = patternSuffix.length * 2; // Convert length to bits
const wildcardBitLength = wildcardLength * 2; // Convert length to bits
for (let bitPos = 0; bitPos <= bitLength - (prefixLength + wildcardBitLength + suffixLength); bitPos += 2) {
if (matchBits(bitArray, bitPos, prefixBits, prefixLength) &&
matchBits(bitArray, bitPos + prefixLength + wildcardBitLength, suffixBits, suffixLength)) {
matches.push(bitPos / 2); // Convert bit position to nucleotide position
}
}
return matches;
};
// Function to match bits with the pattern in integer format
const matchBits = (bitArray, bitPos, patternBits, patternLength) => {
const byteOffset = Math.floor(bitPos / 8);
const bitOffset = bitPos % 8;
// Extract the bits from the array to compare with the pattern
let extractedBits = 0;
for (let i = 0; i < patternLength; i++) {
const currentBitPos = bitPos + i;
const currentByteOffset = Math.floor(currentBitPos / 8);
const currentBitOffset = currentBitPos % 8;
const bit = (bitArray[currentByteOffset] >> (7 - currentBitOffset)) & 1;
extractedBits = (extractedBits << 1) | bit;
}
return extractedBits === patternBits;
};
// Function to extract the actual wildcard strings found between the prefix and suffix
const extractWildcards = (bitArray, matches, prefixLength, wildcardLength) => {
const wildcards = [];
const prefixBitLength = prefixLength * 2; // convert to bits
const wildcardBitLength = wildcardLength * 2; // convert to bits
for (let match of matches) {
let wildcardBits = "";
let bitPos = match * 2 + prefixLength; // Start after the prefix
for (let i = 0; i < wildcardBitLength; i += 2) {
const byteOffset = Math.floor((bitPos + i) / 8);
const bitOffset = (bitPos + i) % 8;
const bits = ((bitArray[byteOffset] >> (6 - bitOffset)) & 0b11).toString(2).padStart(2, '0');
wildcardBits += bits;
}
// Convert the bit string back to a DNA string
let wildcardDNA = "";
for (let i = 0; i < wildcardBits.length; i += 2) {
const bits = wildcardBits.slice(i, i + 2);
wildcardDNA += bitToNucleotide[bits];
}
wildcards.push(wildcardDNA);
}
return wildcards;
};
// Example usage
const dnaString = "AACAAAAAAAAAAAAAAAAAAAAGG";
const bitEncodedDNA = encodeDNA(dnaString);
const decodedDNA = decodeDNA(bitEncodedDNA);
console.log('Decoded DNA Sequence:', decodedDNA);
const patternPrefix = 'CAA';
const patternSuffix = 'AG';
const wildcardLength = 17; // 17 nucleotides 34 bits
const matches = searchDNA(bitEncodedDNA, patternPrefix, patternSuffix, wildcardLength);
console.log("Matches found at nucleotide positions:", matches);
const wildcards = extractWildcards(bitEncodedDNA, matches, patternPrefix.length, wildcardLength);
console.log("Wildcard sequences found:", wildcards);