Not as exciting as it sounds, functional loops such as .forEach() and .every() were permitted lol

So I’ve decided to help maintain a regular schedule I’ll make myself post some past work as well as log what I do these days.

"use strict";

const testlib = require( './testlib.js' );

let dna_matches = [];
let dna_freqs = {};

let dna_map = {
	'C': ['C'],
	'T': ['T'],
	'A': ['A'],
	'G': ['G'],
	'R': ['G', 'A'],
	'Y': ['T', 'C'],
	'K': ['G', 'T'],
	'M': ['A', 'C'],
	'S': ['G', 'C'],
	'W': ['A', 'T'],
	'B': ['G', 'T', 'C'],
	'D': ['G', 'A', 'T'],
	'H': ['A', 'C', 'T'],
	'V': ['G', 'C', 'A'],
	'N': ['A', 'G', 'C', 'T'],
}

let last_entered = '';
let buffers = [];
let offset = 0;

function isMatch(current, index) {
	let matchFound = false; 
	if(buffers[index].length != current.length){
		buffers[index].push(last_entered);
	} else {
		matchFound = dna_matches[index].split("").every( (elem, i) => {
			if(elem == buffers[index][i]){
				return true;
			} else {
				return dna_map[elem].includes(buffers[index][i]);
			}
		});

		if(matchFound){
			dna_freqs[current]++;
			testlib.foundMatch(current, offset - buffers[index].length)
		}
		buffers[index].shift();
		buffers[index].push(last_entered);
	}
}

function reset(current,index) {
	dna_freqs[current] = 0;
	buffers[index] = [];
	offset = 0;
}

testlib.on( 'ready', function( patterns ) {
	dna_matches = patterns;
	dna_matches.forEach(reset);
	testlib.runTests();
} );

testlib.on( 'data', function ( data ) {
	last_entered = data;
	dna_matches.forEach(isMatch);
	offset++;
} );

testlib.on( 'end', function() {
	dna_matches.forEach(isMatch);
	testlib.frequencyTable(dna_freqs);
	dna_matches.forEach(reset);
} );

testlib.on( 'reset', function() {
	dna_matches.forEach(isMatch);
	testlib.frequencyTable(dna_freqs);
	dna_matches.forEach(reset);
});

testlib.setup( 3, 0 );

I’ve removed the comments so I can walk through each step (and also maybe show off the fairly short solution).

So to start with, this is what we were given to work from:

"use strict";

const testlib = require( './testlib.js' );

testlib.on( 'ready', function( patterns ) {

	console.log( "Patterns:", patterns );

	testlib.runTests();
} );

testlib.on( 'data', function( data ) {
	console.log( "<<<", data );
} );

testlib.setup( 1 ); // Runs test 1 (task1.data and task1.seq)

"use strict" enables JavaScript’s strict mode, which essentially doesn’t allow code to execute with undeclared variables being used. This is apparently used to make it easier to write more secure code and changes “bad syntax” warnings to errors.

testlib is a custom library with all the necessary functions to read files with DNA data and output them as required, with the likes of foundMatch(), frequencyTable() etc.

testlib.on portions are asynchronous code blocks, which define behaviour “on” a specific situation such as code start, reading data, being finished with a file and resetting the program to be ready for the next chunk of data.

Now onto my solution. I’ve declared a fair few variables so let’s walk through them:

  • dna_matches is an array that stores the patterns to match for.
  • dna_freqs is a dictionary that stores the frequency table for each pattern.
  • dna_map is a dictionary used to achieve the ability of allowing certain complex nucleotides to match as simple nucleotides.
    • C, T, A and G were left in as I initially thought it was a two-way matter, where simple nucleotides could indicate the presence of complex ones but later found out this wasn’t the case. So instead of refactoring the code I just left them in with themselves as their value.
  • last_entered stores the last nucleotide that was read.
  • buffers array stores an array of buffers, of each pattern given. This is to allow checking for multiple patterns elegantly. These are First-In-Last-Out data structures aka a FILO stack.
  • offset stores the offset within the DNA sequence

So, isMatch(), which is the meat of this solution, works like so:

  1. Set matchFound to false by default
  2. Check if the buffer and its corresponding pattern are the same length
    1. If not, then just add to the buffer and do nothing, ergo return from the function
  3. If they are the same length, then we can begin pattern checking,
    1. Set matchFound to true if every following value is true, otherwise it’ll stay as false
    2. With the current pattern, split it by "" to get an array of characters, and for each character using the .every function:
    3. If the character is the same as the character of the corresponding buffer at the index of the current item in the dna_matches array, then return true.
    4. If not, then first check if this is something that is technically valid according to dna_map, where a complex nucleotide can indicate the presence of a simpler one. The value returned will be added to the temporary array made by .every
  4. If all tests pass, then increment the corresponding item in the frequency table and, using the library function foundMatch, report a match of the current pattern being checked and its offset within the entire sequence. We have to subtract the offset by the length of the current buffer/pattern to account for waiting on the buffer to be filled at the start of the code. Put simply, the offset is at the very end of the sequence, when in reality we need to report where the pattern starts.
  5. shift() the buffer to remove the oldest element and push() in last_entered at the start of the buffer

reset() simply clears out stored values within the frequency table, the buffers and the offset when we reach the End Of Line or End Of File.

Other than this, the reset of the code is very simple.

  1. ‘ready’: When the program starts, initialise the patterns to look for and run the tests.
  2. ‘data’: When data is read, set last_entered to the character read and run isMatch “forEach” pattern in dna_matches and increase the offset.
  3. ’end’: When the end of a sequence is reached, run a final isMatch as the last entered character hasn’t been checked. Report frequencies and reset everything.
  4. ‘reset’: When the end of all sequences is reached, run a final do the same as above. (redundant code I know)

The final line simply tells the program to run test type 3 for all given data. The previous tests just incrementally worked up from only reporting frequencies to reporting matches.

And there you have it, DNA sequencing without for or while loop code blocks. Here’s the commented code for those interested, with all the typos I’d submitted them with and only realised whilst making this :P

"use strict";

const testlib = require( './testlib.js' );

let dna_matches = []; // Stores patterns to look for
let dna_freqs = {}; // Stores frequency table

/*
 * Used to check complex nucleotides -> simple ones
 * */
let dna_map = {
	'C': ['C'],
	'T': ['T'],
	'A': ['A'],
	'G': ['G'],
	'R': ['G', 'A'],
	'Y': ['T', 'C'],
	'K': ['G', 'T'],
	'M': ['A', 'C'],
	'S': ['G', 'C'],
	'W': ['A', 'T'],
	'B': ['G', 'T', 'C'],
	'D': ['G', 'A', 'T'],
	'H': ['A', 'C', 'T'],
	'V': ['G', 'C', 'A'],
	'N': ['A', 'G', 'C', 'T'],
}

let last_entered = ''; // Stores the last read data
let buffers = []; // Stores an array of buffers, matching to each pattern
let offset = 0; // Stores offset of DNA sequence

/*
 * Used to fill up the buffers, shift them along and simultaneosly make checks.
 * Checks work by seeing if dna_map[pattern_letter] returns an array with the current buffer letter.
 * */
function isMatch(current, index) {
	let matchFound = false; // Used as the flag for finding a match or not at a certain point in the sequence

	// If the current pattern length isn't the same as the corresponding bufferm only push data
	if(buffers[index].length != current.length){
		buffers[index].push(last_entered);
	/*
	 * Otherwise, shift buffers[index] to remove the first element and push the latest data at the end. This makes
	 * our buffer a FILO data structure.
	 *
	 * Check if every return value is true whilst iterating both the pattern and buffer.
	 * It will evaluate to true if an exact match is found or if a match is found in dna_map.
	*/
	} else {
		matchFound = dna_matches[index].split("").every( (elem, i) => {
			if(elem == buffers[index][i]){
				return true;
			} else {
				return dna_map[elem].includes(buffers[index][i]);
			}
		});
		/*
		 * If all tests passed, then increment the corresponding part of the frequency table and
		 * report the match found. offset - buffers[index].length is required as whilst the buffers
		 * fill up, offset is incrementing regardless, despite the fact the buffers aren't "moving".
		*/
		if(matchFound){
			dna_freqs[current]++;
			testlib.foundMatch(current, offset - buffers[index].length)
		}
		buffers[index].shift();
		buffers[index].push(last_entered);
	}
}

/*
 * Resets values at each EOL and at the EOF
 */
function reset(current,index) {
	dna_freqs[current] = 0;
	buffers[index] = [];
	offset = 0;
}

/*
 * Initialises data, mainly the frequency table and buffers.
 */
testlib.on( 'ready', function( patterns ) {
	dna_matches = patterns;
	dna_matches.forEach(reset);
	testlib.runTests();
} );

/*
 * Stores the data read and runs isMatch on each pattern we look for. Finally the offset is incremented.
 */
testlib.on( 'data', function ( data ) {
	last_entered = data;
	dna_matches.forEach(isMatch);
	offset++;
} );

/*
 * Runs a final isMatch on everything as the last buffer didn't get checked in data.
 * Then reports frequencies and resets everything.
 */
testlib.on( 'end', function() {
	dna_matches.forEach(isMatch);
	testlib.frequencyTable(dna_freqs);
	dna_matches.forEach(reset);
} );

/*
 * Runs a final isMatch on everything as the last buffer didn't get checked in data.
 * Then reports frequencies and resets everything.
 */
testlib.on( 'reset', function() {
	dna_matches.forEach(isMatch);
	testlib.frequencyTable(dna_freqs);
	dna_matches.forEach(reset);
});

testlib.setup( 3, 0 );