QuEST_cpu.c
Go to the documentation of this file.
1 // Distributed under MIT licence. See https://github.com/QuEST-Kit/QuEST/blob/master/LICENCE.txt for details
2 
13 # include "QuEST.h"
14 # include "QuEST_internal.h"
15 # include "QuEST_precision.h"
16 # include "mt19937ar.h"
17 
18 # include "QuEST_cpu_internal.h"
19 
20 # include <math.h>
21 # include <stdio.h>
22 # include <stdlib.h>
23 # include <stdint.h>
24 # include <assert.h>
25 
26 # ifdef _OPENMP
27 # include <omp.h>
28 # endif
29 
30 
31 
32 /*
33  * overloads for consistent API with GPU
34  */
35 
36 void copyStateToGPU(Qureg qureg) {
37 }
38 
39 void copyStateFromGPU(Qureg qureg) {
40 }
41 
42 
43 
44 /*
45  * state vector and density matrix operations
46  */
47 
48 void densmatr_oneQubitDegradeOffDiagonal(Qureg qureg, int targetQubit, qreal retain){
49  long long int numTasks = qureg.numAmpsPerChunk;
50  long long int innerMask = 1LL << targetQubit;
51  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
52 
53  long long int thisTask;
54  long long int thisPattern;
55  long long int totMask = innerMask|outerMask;
56 
57 # ifdef _OPENMP
58 # pragma omp parallel \
59  default (none) \
60  shared (innerMask,outerMask,totMask,qureg,retain,numTasks, targetQubit) \
61  private (thisTask,thisPattern)
62 # endif
63  {
64 # ifdef _OPENMP
65 # pragma omp for schedule (static)
66 # endif
67  for (thisTask=0; thisTask<numTasks; thisTask++){
68  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
69  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
70  // do dephase
71  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
72  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
73  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
74  }
75  }
76  }
77 }
78 
79 void densmatr_mixDephasing(Qureg qureg, int targetQubit, qreal dephase) {
80  qreal retain=1-dephase;
81  densmatr_oneQubitDegradeOffDiagonal(qureg, targetQubit, retain);
82 }
83 
84 void densmatr_mixTwoQubitDephasing(Qureg qureg, int qubit1, int qubit2, qreal dephase) {
85  qreal retain=1-dephase;
86 
87  long long int numTasks = qureg.numAmpsPerChunk;
88  long long int innerMaskQubit1 = 1LL << qubit1;
89  long long int outerMaskQubit1 = 1LL << (qubit1 + (qureg.numQubitsRepresented));
90  long long int innerMaskQubit2 = 1LL << qubit2;
91  long long int outerMaskQubit2 = 1LL << (qubit2 + (qureg.numQubitsRepresented));
92  long long int totMaskQubit1 = innerMaskQubit1|outerMaskQubit1;
93  long long int totMaskQubit2 = innerMaskQubit2|outerMaskQubit2;
94 
95  long long int thisTask;
96  long long int thisPatternQubit1, thisPatternQubit2;
97 
98 # ifdef _OPENMP
99 # pragma omp parallel \
100  default (none) \
101  shared (innerMaskQubit1,outerMaskQubit1,totMaskQubit1,innerMaskQubit2,outerMaskQubit2, \
102  totMaskQubit2,qureg,retain,numTasks) \
103  private (thisTask,thisPatternQubit1,thisPatternQubit2)
104 # endif
105  {
106 # ifdef _OPENMP
107 # pragma omp for schedule (static)
108 # endif
109  for (thisTask=0; thisTask<numTasks; thisTask++){
110  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
111  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
112 
113  // any mismatch |...0...><...1...| etc
114  if ( (thisPatternQubit1==innerMaskQubit1) || (thisPatternQubit1==outerMaskQubit1) ||
115  (thisPatternQubit2==innerMaskQubit2) || (thisPatternQubit2==outerMaskQubit2) ){
116  // do dephase
117  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
118  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
119  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
120  }
121  }
122  }
123 }
124 
125 void densmatr_mixDepolarisingLocal(Qureg qureg, int targetQubit, qreal depolLevel) {
126  qreal retain=1-depolLevel;
127 
128  long long int numTasks = qureg.numAmpsPerChunk;
129  long long int innerMask = 1LL << targetQubit;
130  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
131  long long int totMask = innerMask|outerMask;
132 
133  long long int thisTask;
134  long long int partner;
135  long long int thisPattern;
136 
137  qreal realAv, imagAv;
138 
139 # ifdef _OPENMP
140 # pragma omp parallel \
141  default (none) \
142  shared (innerMask,outerMask,totMask,qureg,retain,depolLevel,numTasks) \
143  private (thisTask,partner,thisPattern,realAv,imagAv)
144 # endif
145  {
146 # ifdef _OPENMP
147 # pragma omp for schedule (static)
148 # endif
149  for (thisTask=0; thisTask<numTasks; thisTask++){
150  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
151  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
152  // do dephase
153  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
154  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
155  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
156  } else {
157  if ((thisTask&totMask)==0){ //this element relates to targetQubit in state 0
158  // do depolarise
159  partner = thisTask | totMask;
160  realAv = (qureg.stateVec.real[thisTask] + qureg.stateVec.real[partner]) /2 ;
161  imagAv = (qureg.stateVec.imag[thisTask] + qureg.stateVec.imag[partner]) /2 ;
162 
163  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask] + depolLevel*realAv;
164  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask] + depolLevel*imagAv;
165 
166  qureg.stateVec.real[partner] = retain*qureg.stateVec.real[partner] + depolLevel*realAv;
167  qureg.stateVec.imag[partner] = retain*qureg.stateVec.imag[partner] + depolLevel*imagAv;
168  }
169  }
170  }
171  }
172 }
173 
174 void densmatr_mixDampingLocal(Qureg qureg, int targetQubit, qreal damping) {
175  qreal retain=1-damping;
176  qreal dephase=sqrt(retain);
177 
178  long long int numTasks = qureg.numAmpsPerChunk;
179  long long int innerMask = 1LL << targetQubit;
180  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
181  long long int totMask = innerMask|outerMask;
182 
183  long long int thisTask;
184  long long int partner;
185  long long int thisPattern;
186 
187  //qreal realAv, imagAv;
188 
189 # ifdef _OPENMP
190 # pragma omp parallel \
191  default (none) \
192  shared (innerMask,outerMask,totMask,qureg,retain,damping,dephase,numTasks) \
193  private (thisTask,partner,thisPattern)
194 # endif
195  {
196 # ifdef _OPENMP
197 # pragma omp for schedule (static)
198 # endif
199  for (thisTask=0; thisTask<numTasks; thisTask++){
200  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
201  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
202  // do dephase
203  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
204  qureg.stateVec.real[thisTask] = dephase*qureg.stateVec.real[thisTask];
205  qureg.stateVec.imag[thisTask] = dephase*qureg.stateVec.imag[thisTask];
206  } else {
207  if ((thisTask&totMask)==0){ //this element relates to targetQubit in state 0
208  // do depolarise
209  partner = thisTask | totMask;
210  //realAv = (qureg.stateVec.real[thisTask] + qureg.stateVec.real[partner]) /2 ;
211  //imagAv = (qureg.stateVec.imag[thisTask] + qureg.stateVec.imag[partner]) /2 ;
212 
213  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask] + damping*qureg.stateVec.real[partner];
214  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask] + damping*qureg.stateVec.imag[partner];
215 
216  qureg.stateVec.real[partner] = retain*qureg.stateVec.real[partner];
217  qureg.stateVec.imag[partner] = retain*qureg.stateVec.imag[partner];
218  }
219  }
220  }
221  }
222 }
223 
224 void densmatr_mixDepolarisingDistributed(Qureg qureg, int targetQubit, qreal depolLevel) {
225 
226  // first do dephase part.
227  // TODO -- this might be more efficient to do at the same time as the depolarise if we move to
228  // iterating over all elements in the state vector for the purpose of vectorisation
229  // TODO -- if we keep this split, move this function to densmatr_mixDepolarising()
230  densmatr_mixDephasing(qureg, targetQubit, depolLevel);
231 
232  long long int sizeInnerBlock, sizeInnerHalfBlock;
233  long long int sizeOuterColumn, sizeOuterHalfColumn;
234  long long int thisInnerBlock, // current block
235  thisOuterColumn, // current column in density matrix
236  thisIndex, // current index in (density matrix representation) state vector
237  thisIndexInOuterColumn,
238  thisIndexInInnerBlock;
239  int outerBit;
240 
241  long long int thisTask;
242  long long int numTasks=qureg.numAmpsPerChunk>>1;
243 
244  // set dimensions
245  sizeInnerHalfBlock = 1LL << targetQubit;
246  sizeInnerBlock = 2LL * sizeInnerHalfBlock;
247  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
248  sizeOuterHalfColumn = sizeOuterColumn >> 1;
249 
250 # ifdef _OPENMP
251 # pragma omp parallel \
252  default (none) \
253  shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
254  qureg,depolLevel,numTasks,targetQubit) \
255  private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
256  thisIndexInInnerBlock,outerBit)
257 # endif
258  {
259 # ifdef _OPENMP
260 # pragma omp for schedule (static)
261 # endif
262  // thisTask iterates over half the elements in this process' chunk of the density matrix
263  // treat this as iterating over all columns, then iterating over half the values
264  // within one column.
265  // If this function has been called, this process' chunk contains half an
266  // outer block or less
267  for (thisTask=0; thisTask<numTasks; thisTask++) {
268  // we want to process all columns in the density matrix,
269  // updating the values for half of each column (one half of each inner block)
270  thisOuterColumn = thisTask / sizeOuterHalfColumn;
271  thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1); // thisTask % sizeOuterHalfColumn
272  thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
273  // get index in state vector corresponding to upper inner block
274  thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1); // thisTask % sizeInnerHalfBlock
275  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
276  + thisIndexInInnerBlock;
277  // check if we are in the upper or lower half of an outer block
278  outerBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
279  // if we are in the lower half of an outer block, shift to be in the lower half
280  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
281  thisIndex += outerBit*(sizeInnerHalfBlock);
282 
283  // NOTE: at this point thisIndex should be the index of the element we want to
284  // dephase in the chunk of the state vector on this process, in the
285  // density matrix representation.
286  // thisTask is the index of the pair element in pairStateVec
287 
288 
289  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
290  // + pair[thisTask])/2
291  qureg.stateVec.real[thisIndex] = (1-depolLevel)*qureg.stateVec.real[thisIndex] +
292  depolLevel*(qureg.stateVec.real[thisIndex] + qureg.pairStateVec.real[thisTask])/2;
293 
294  qureg.stateVec.imag[thisIndex] = (1-depolLevel)*qureg.stateVec.imag[thisIndex] +
295  depolLevel*(qureg.stateVec.imag[thisIndex] + qureg.pairStateVec.imag[thisTask])/2;
296  }
297  }
298 }
299 
300 void densmatr_mixDampingDistributed(Qureg qureg, int targetQubit, qreal damping) {
301  qreal retain=1-damping;
302  qreal dephase=sqrt(1-damping);
303 
304  // multiply the off-diagonal (|0><1| and |1><0|) terms by sqrt(1-damping)
305  densmatr_oneQubitDegradeOffDiagonal(qureg, targetQubit, dephase);
306 
307  // below, we modify the diagonals terms which require |1><1| to |0><0| communication
308 
309  long long int sizeInnerBlock, sizeInnerHalfBlock;
310  long long int sizeOuterColumn, sizeOuterHalfColumn;
311  long long int thisInnerBlock, // current block
312  thisOuterColumn, // current column in density matrix
313  thisIndex, // current index in (density matrix representation) state vector
314  thisIndexInOuterColumn,
315  thisIndexInInnerBlock;
316  int outerBit;
317  int stateBit;
318 
319  long long int thisTask;
320  long long int numTasks=qureg.numAmpsPerChunk>>1;
321 
322  // set dimensions
323  sizeInnerHalfBlock = 1LL << targetQubit;
324  sizeInnerBlock = 2LL * sizeInnerHalfBlock;
325  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
326  sizeOuterHalfColumn = sizeOuterColumn >> 1;
327 
328 # ifdef _OPENMP
329 # pragma omp parallel \
330  default (none) \
331  shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
332  qureg,damping, retain, dephase, numTasks,targetQubit) \
333  private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
334  thisIndexInInnerBlock,outerBit, stateBit)
335 # endif
336  {
337 # ifdef _OPENMP
338 # pragma omp for schedule (static)
339 # endif
340  // thisTask iterates over half the elements in this process' chunk of the density matrix
341  // treat this as iterating over all columns, then iterating over half the values
342  // within one column.
343  // If this function has been called, this process' chunk contains half an
344  // outer block or less
345  for (thisTask=0; thisTask<numTasks; thisTask++) {
346  // we want to process all columns in the density matrix,
347  // updating the values for half of each column (one half of each inner block)
348  thisOuterColumn = thisTask / sizeOuterHalfColumn;
349  thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1); // thisTask % sizeOuterHalfColumn
350  thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
351  // get index in state vector corresponding to upper inner block
352  thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1); // thisTask % sizeInnerHalfBlock
353  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
354  + thisIndexInInnerBlock;
355  // check if we are in the upper or lower half of an outer block
356  outerBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
357  // if we are in the lower half of an outer block, shift to be in the lower half
358  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
359  thisIndex += outerBit*(sizeInnerHalfBlock);
360 
361  // NOTE: at this point thisIndex should be the index of the element we want to
362  // dephase in the chunk of the state vector on this process, in the
363  // density matrix representation.
364  // thisTask is the index of the pair element in pairStateVec
365 
366  // Extract state bit, is 0 if thisIndex corresponds to a state with 0 in the target qubit
367  // and is 1 if thisIndex corresponds to a state with 1 in the target qubit
368  stateBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId));
369 
370  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
371  // + pair[thisTask])/2
372  if(stateBit == 0){
373  qureg.stateVec.real[thisIndex] = qureg.stateVec.real[thisIndex] +
374  damping*( qureg.pairStateVec.real[thisTask]);
375 
376  qureg.stateVec.imag[thisIndex] = qureg.stateVec.imag[thisIndex] +
377  damping*( qureg.pairStateVec.imag[thisTask]);
378  } else{
379  qureg.stateVec.real[thisIndex] = retain*qureg.stateVec.real[thisIndex];
380 
381  qureg.stateVec.imag[thisIndex] = retain*qureg.stateVec.imag[thisIndex];
382  }
383  }
384  }
385 }
386 
387 void densmatr_mixTwoQubitDepolarisingLocal(Qureg qureg, int qubit1, int qubit2, qreal delta, qreal gamma) {
388  long long int numTasks = qureg.numAmpsPerChunk;
389  long long int innerMaskQubit1 = 1LL << qubit1;
390  long long int outerMaskQubit1= 1LL << (qubit1 + qureg.numQubitsRepresented);
391  long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
392  long long int innerMaskQubit2 = 1LL << qubit2;
393  long long int outerMaskQubit2 = 1LL << (qubit2 + qureg.numQubitsRepresented);
394  long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
395 
396  long long int thisTask;
397  long long int partner;
398  long long int thisPatternQubit1, thisPatternQubit2;
399 
400  qreal real00, imag00;
401 
402 # ifdef _OPENMP
403 # pragma omp parallel \
404  default (none) \
405  shared (totMaskQubit1,totMaskQubit2,qureg,delta,gamma,numTasks) \
406  private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
407 # endif
408  {
409 # ifdef _OPENMP
410 # pragma omp for schedule (static)
411 # endif
412  //--------------------------------------- STEP ONE ---------------------
413  for (thisTask=0; thisTask<numTasks; thisTask++){
414  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
415  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
416  if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
417  || (thisPatternQubit2==totMaskQubit2))){
418  //this element of form |...X...0...><...X...0...| for X either 0 or 1.
419  partner = thisTask | totMaskQubit1;
420  real00 = qureg.stateVec.real[thisTask];
421  imag00 = qureg.stateVec.imag[thisTask];
422 
423  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
424  + delta*qureg.stateVec.real[partner];
425  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
426  + delta*qureg.stateVec.imag[partner];
427 
428  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
429  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
430 
431  }
432  }
433 # ifdef _OPENMP
434 # pragma omp for schedule (static)
435 # endif
436  //--------------------------------------- STEP TWO ---------------------
437  for (thisTask=0; thisTask<numTasks; thisTask++){
438  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
439  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
440  if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
441  || (thisPatternQubit1==totMaskQubit1))){
442  //this element of form |...0...X...><...0...X...| for X either 0 or 1.
443  partner = thisTask | totMaskQubit2;
444  real00 = qureg.stateVec.real[thisTask];
445  imag00 = qureg.stateVec.imag[thisTask];
446 
447  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
448  + delta*qureg.stateVec.real[partner];
449  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
450  + delta*qureg.stateVec.imag[partner];
451 
452  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
453  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
454 
455  }
456  }
457 
458 # ifdef _OPENMP
459 # pragma omp for schedule (static)
460 # endif
461  //--------------------------------------- STEP THREE ---------------------
462  for (thisTask=0; thisTask<numTasks; thisTask++){
463  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
464  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
465  if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
466  || (thisPatternQubit1==totMaskQubit1))){
467  //this element of form |...0...X...><...0...X...| for X either 0 or 1.
468  partner = thisTask | totMaskQubit2;
469  partner = partner ^ totMaskQubit1;
470  real00 = qureg.stateVec.real[thisTask];
471  imag00 = qureg.stateVec.imag[thisTask];
472 
473  qureg.stateVec.real[thisTask] = gamma * (qureg.stateVec.real[thisTask]
474  + delta*qureg.stateVec.real[partner]);
475  qureg.stateVec.imag[thisTask] = gamma * (qureg.stateVec.imag[thisTask]
476  + delta*qureg.stateVec.imag[partner]);
477 
478  qureg.stateVec.real[partner] = gamma * (qureg.stateVec.real[partner]
479  + delta*real00);
480  qureg.stateVec.imag[partner] = gamma * (qureg.stateVec.imag[partner]
481  + delta*imag00);
482 
483  }
484  }
485  }
486 }
487 
488 void densmatr_mixTwoQubitDepolarisingLocalPart1(Qureg qureg, int qubit1, int qubit2, qreal delta) {
489  long long int numTasks = qureg.numAmpsPerChunk;
490  long long int innerMaskQubit1 = 1LL << qubit1;
491  long long int outerMaskQubit1= 1LL << (qubit1 + qureg.numQubitsRepresented);
492  long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
493  long long int innerMaskQubit2 = 1LL << qubit2;
494  long long int outerMaskQubit2 = 1LL << (qubit2 + qureg.numQubitsRepresented);
495  long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
496  // correct for being in a particular chunk
497  //totMaskQubit2 = totMaskQubit2&(qureg.numAmpsPerChunk-1); // totMaskQubit2 % numAmpsPerChunk
498 
499 
500  long long int thisTask;
501  long long int partner;
502  long long int thisPatternQubit1, thisPatternQubit2;
503 
504  qreal real00, imag00;
505 
506 # ifdef _OPENMP
507 # pragma omp parallel \
508  default (none) \
509  shared (totMaskQubit1,totMaskQubit2,qureg,delta,numTasks) \
510  private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
511 # endif
512  {
513 
514 # ifdef _OPENMP
515 # pragma omp for schedule (static)
516 # endif
517  //--------------------------------------- STEP ONE ---------------------
518  for (thisTask=0; thisTask<numTasks; thisTask ++){
519  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
520  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
521  if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
522  || (thisPatternQubit2==totMaskQubit2))){
523  //this element of form |...X...0...><...X...0...| for X either 0 or 1.
524  partner = thisTask | totMaskQubit1;
525  real00 = qureg.stateVec.real[thisTask];
526  imag00 = qureg.stateVec.imag[thisTask];
527 
528  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
529  + delta*qureg.stateVec.real[partner];
530  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
531  + delta*qureg.stateVec.imag[partner];
532 
533  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
534  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
535 
536  }
537  }
538  }
539 }
540 
542  int qubit2, qreal delta, qreal gamma) {
543 
544  long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
545  long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
546  long long int sizeOuterColumn, sizeOuterQuarterColumn;
547  long long int thisInnerBlockQ2,
548  thisOuterColumn, // current column in density matrix
549  thisIndex, // current index in (density matrix representation) state vector
550  thisIndexInOuterColumn,
551  thisIndexInInnerBlockQ1,
552  thisIndexInInnerBlockQ2,
553  thisInnerBlockQ1InInnerBlockQ2;
554  int outerBitQ1, outerBitQ2;
555 
556  long long int thisTask;
557  long long int numTasks=qureg.numAmpsPerChunk>>2;
558 
559  // set dimensions
560  sizeInnerHalfBlockQ1 = 1LL << targetQubit;
561  sizeInnerHalfBlockQ2 = 1LL << qubit2;
562  sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
563  sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
564  sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
565  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
566  sizeOuterQuarterColumn = sizeOuterColumn >> 2;
567 
568 # ifdef _OPENMP
569 # pragma omp parallel \
570  default (none) \
571  shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
572  sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma,numTasks,targetQubit,qubit2) \
573  private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
574  thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
575  thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
576 # endif
577  {
578 # ifdef _OPENMP
579 # pragma omp for schedule (static)
580 # endif
581  // thisTask iterates over half the elements in this process' chunk of the density matrix
582  // treat this as iterating over all columns, then iterating over half the values
583  // within one column.
584  // If this function has been called, this process' chunk contains half an
585  // outer block or less
586  for (thisTask=0; thisTask<numTasks; thisTask++) {
587  // we want to process all columns in the density matrix,
588  // updating the values for half of each column (one half of each inner block)
589  thisOuterColumn = thisTask / sizeOuterQuarterColumn;
590  // thisTask % sizeOuterQuarterColumn
591  thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
592  thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
593  // thisTask % sizeInnerQuarterBlockQ2;
594  thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
595  thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
596  // thisTask % sizeInnerHalfBlockQ1;
597  thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
598 
599  // get index in state vector corresponding to upper inner block
600  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
601  + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
602 
603  // check if we are in the upper or lower half of an outer block for Q1
604  outerBitQ1 = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
605  // if we are in the lower half of an outer block, shift to be in the lower half
606  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
607  thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
608 
609  // check if we are in the upper or lower half of an outer block for Q2
610  outerBitQ2 = extractBit(qubit2, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
611  // if we are in the lower half of an outer block, shift to be in the lower half
612  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
613  thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
614 
615  // NOTE: at this point thisIndex should be the index of the element we want to
616  // dephase in the chunk of the state vector on this process, in the
617  // density matrix representation.
618  // thisTask is the index of the pair element in pairStateVec
619 
620 
621  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
622  // + pair[thisTask])/2
623  // NOTE: must set gamma=1 if using this function for steps 1 or 2
624  qureg.stateVec.real[thisIndex] = gamma*(qureg.stateVec.real[thisIndex] +
625  delta*qureg.pairStateVec.real[thisTask]);
626  qureg.stateVec.imag[thisIndex] = gamma*(qureg.stateVec.imag[thisIndex] +
627  delta*qureg.pairStateVec.imag[thisTask]);
628  }
629  }
630 }
631 
633  int qubit2, qreal delta, qreal gamma) {
634 
635  long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
636  long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
637  long long int sizeOuterColumn, sizeOuterQuarterColumn;
638  long long int thisInnerBlockQ2,
639  thisOuterColumn, // current column in density matrix
640  thisIndex, // current index in (density matrix representation) state vector
641  thisIndexInPairVector,
642  thisIndexInOuterColumn,
643  thisIndexInInnerBlockQ1,
644  thisIndexInInnerBlockQ2,
645  thisInnerBlockQ1InInnerBlockQ2;
646  int outerBitQ1, outerBitQ2;
647 
648  long long int thisTask;
649  long long int numTasks=qureg.numAmpsPerChunk>>2;
650 
651  // set dimensions
652  sizeInnerHalfBlockQ1 = 1LL << targetQubit;
653  sizeInnerHalfBlockQ2 = 1LL << qubit2;
654  sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
655  sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
656  sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
657  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
658  sizeOuterQuarterColumn = sizeOuterColumn >> 2;
659 
660 //# if 0
661 # ifdef _OPENMP
662 # pragma omp parallel \
663  default (none) \
664  shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
665  sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma, numTasks,targetQubit,qubit2) \
666  private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
667  thisOuterColumn,thisIndex,thisIndexInPairVector,thisIndexInOuterColumn, \
668  thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
669 # endif
670  {
671 # ifdef _OPENMP
672 # pragma omp for schedule (static)
673 # endif
674 //# endif
675  // thisTask iterates over half the elements in this process' chunk of the density matrix
676  // treat this as iterating over all columns, then iterating over half the values
677  // within one column.
678  // If this function has been called, this process' chunk contains half an
679  // outer block or less
680  for (thisTask=0; thisTask<numTasks; thisTask++) {
681  // we want to process all columns in the density matrix,
682  // updating the values for half of each column (one half of each inner block)
683  thisOuterColumn = thisTask / sizeOuterQuarterColumn;
684  // thisTask % sizeOuterQuarterColumn
685  thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
686  thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
687  // thisTask % sizeInnerQuarterBlockQ2;
688  thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
689  thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
690  // thisTask % sizeInnerHalfBlockQ1;
691  thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
692 
693  // get index in state vector corresponding to upper inner block
694  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
695  + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
696 
697  // check if we are in the upper or lower half of an outer block for Q1
698  outerBitQ1 = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
699  // if we are in the lower half of an outer block, shift to be in the lower half
700  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
701  thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
702 
703  // For part 3 we need to match elements such that (my Q1 != pair Q1) AND (my Q2 != pair Q2)
704  // Find correct index in pairStateVector
705  thisIndexInPairVector = thisTask + (1-outerBitQ1)*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn -
706  outerBitQ1*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn;
707 
708  // check if we are in the upper or lower half of an outer block for Q2
709  outerBitQ2 = extractBit(qubit2, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
710  // if we are in the lower half of an outer block, shift to be in the lower half
711  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
712  thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
713 
714 
715  // NOTE: at this point thisIndex should be the index of the element we want to
716  // dephase in the chunk of the state vector on this process, in the
717  // density matrix representation.
718 
719 
720  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
721  // + pair[thisIndexInPairVector])/2
722  qureg.stateVec.real[thisIndex] = gamma*(qureg.stateVec.real[thisIndex] +
723  delta*qureg.pairStateVec.real[thisIndexInPairVector]);
724 
725  qureg.stateVec.imag[thisIndex] = gamma*(qureg.stateVec.imag[thisIndex] +
726  delta*qureg.pairStateVec.imag[thisIndexInPairVector]);
727  }
728  }
729 
730 }
731 
732 
733 /* Without nested parallelisation, only the outer most loops which call below are parallelised */
734 void zeroSomeAmps(Qureg qureg, long long int startInd, long long int numAmps) {
735  long long int i;
736 # ifdef _OPENMP
737 # pragma omp parallel for schedule (static)
738 # endif
739  for (i=startInd; i < startInd+numAmps; i++) {
740  qureg.stateVec.real[i] = 0;
741  qureg.stateVec.imag[i] = 0;
742  }
743 }
744 void normaliseSomeAmps(Qureg qureg, qreal norm, long long int startInd, long long int numAmps) {
745  long long int i;
746 # ifdef _OPENMP
747 # pragma omp parallel for schedule (static)
748 # endif
749  for (i=startInd; i < startInd+numAmps; i++) {
750  qureg.stateVec.real[i] /= norm;
751  qureg.stateVec.imag[i] /= norm;
752  }
753 }
755  Qureg qureg, qreal norm, int normFirst,
756  long long int startAmpInd, long long int numAmps, long long int blockSize
757 ) {
758  long long int numDubBlocks = numAmps / (2*blockSize);
759  long long int blockStartInd;
760 
761  if (normFirst) {
762  long long int dubBlockInd;
763 # ifdef _OPENMP
764 # pragma omp parallel for schedule (static) private (blockStartInd)
765 # endif
766  for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
767  blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
768  normaliseSomeAmps(qureg, norm, blockStartInd, blockSize); // |0><0|
769  zeroSomeAmps( qureg, blockStartInd + blockSize, blockSize);
770  }
771  } else {
772  long long int dubBlockInd;
773 # ifdef _OPENMP
774 # pragma omp parallel for schedule (static) private (blockStartInd)
775 # endif
776  for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
777  blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
778  zeroSomeAmps( qureg, blockStartInd, blockSize);
779  normaliseSomeAmps(qureg, norm, blockStartInd + blockSize, blockSize); // |1><1|
780  }
781  }
782 }
783 
785 void densmatr_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb) {
786 
787  // only (global) indices (as bit sequence): '* outcome *(n+q) outcome *q are spared
788  // where n = measureQubit, q = qureg.numQubitsRepresented.
789  // We can thus step in blocks of 2^q+n, killing every second, and inside the others,
790  // stepping in sub-blocks of 2^q, killing every second.
791  // When outcome=1, we offset the start of these blocks by their size.
792  long long int innerBlockSize = (1LL << measureQubit);
793  long long int outerBlockSize = (1LL << (measureQubit + qureg.numQubitsRepresented));
794 
795  // Because there are 2^a number of nodes(/chunks), each node will contain 2^b number of blocks,
796  // or each block will span 2^c number of nodes. Similarly for the innerblocks.
797  long long int locNumAmps = qureg.numAmpsPerChunk;
798  long long int globalStartInd = qureg.chunkId * locNumAmps;
799  int innerBit = extractBit(measureQubit, globalStartInd);
800  int outerBit = extractBit(measureQubit + qureg.numQubitsRepresented, globalStartInd);
801 
802  // If this chunk's amps are entirely inside an outer block
803  if (locNumAmps <= outerBlockSize) {
804 
805  // if this is an undesired outer block, kill all elems
806  if (outerBit != outcome)
807  return zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
808 
809  // othwerwise, if this is a desired outer block, and also entirely an inner block
810  if (locNumAmps <= innerBlockSize) {
811 
812  // and that inner block is undesired, kill all elems
813  if (innerBit != outcome)
814  return zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
815  // otherwise normalise all elems
816  else
817  return normaliseSomeAmps(qureg, totalStateProb, 0, qureg.numAmpsPerChunk);
818  }
819 
820  // otherwise this is a desired outer block which contains 2^a inner blocks; kill/renorm every second inner block
822  qureg, totalStateProb, innerBit==outcome, 0, qureg.numAmpsPerChunk, innerBlockSize);
823  }
824 
825  // Otherwise, this chunk's amps contain multiple outer blocks (and hence multiple inner blocks)
826  long long int numOuterDoubleBlocks = locNumAmps / (2*outerBlockSize);
827  long long int firstBlockInd;
828 
829  // alternate norming* and zeroing the outer blocks (with order based on the desired outcome)
830  // These loops aren't parallelised, since they could have 1 or 2 iterations and will prevent
831  // inner parallelisation
832  if (outerBit == outcome) {
833 
834  for (long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
835  firstBlockInd = outerDubBlockInd*2*outerBlockSize;
836 
837  // *norm only the desired inner blocks in the desired outer block
839  qureg, totalStateProb, innerBit==outcome,
840  firstBlockInd, outerBlockSize, innerBlockSize);
841 
842  // zero the undesired outer block
843  zeroSomeAmps(qureg, firstBlockInd + outerBlockSize, outerBlockSize);
844  }
845 
846  } else {
847 
848  for (long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
849  firstBlockInd = outerDubBlockInd*2*outerBlockSize;
850 
851  // same thing but undesired outer blocks come first
852  zeroSomeAmps(qureg, firstBlockInd, outerBlockSize);
854  qureg, totalStateProb, innerBit==outcome,
855  firstBlockInd + outerBlockSize, outerBlockSize, innerBlockSize);
856  }
857  }
858 
859 }
860 
862 
863  /* sum of qureg^2, which is sum_i |qureg[i]|^2 */
864  long long int index;
865  long long int numAmps = qureg.numAmpsPerChunk;
866 
867  qreal trace = 0;
868  qreal *vecRe = qureg.stateVec.real;
869  qreal *vecIm = qureg.stateVec.imag;
870 
871 # ifdef _OPENMP
872 # pragma omp parallel \
873  shared (vecRe, vecIm, numAmps) \
874  private (index) \
875  reduction ( +:trace )
876 # endif
877  {
878 # ifdef _OPENMP
879 # pragma omp for schedule (static)
880 # endif
881  for (index=0LL; index<numAmps; index++) {
882 
883  trace += vecRe[index]*vecRe[index] + vecIm[index]*vecIm[index];
884  }
885  }
886 
887  return trace;
888 }
889 
890 void densmatr_mixDensityMatrix(Qureg combineQureg, qreal otherProb, Qureg otherQureg) {
891 
892  /* corresponding amplitudes live on the same node (same dimensions) */
893 
894  // unpack vars for OpenMP
895  qreal* combineVecRe = combineQureg.stateVec.real;
896  qreal* combineVecIm = combineQureg.stateVec.imag;
897  qreal* otherVecRe = otherQureg.stateVec.real;
898  qreal* otherVecIm = otherQureg.stateVec.imag;
899  long long int numAmps = combineQureg.numAmpsPerChunk;
900  long long int index;
901 
902 # ifdef _OPENMP
903 # pragma omp parallel \
904  default (none) \
905  shared (combineVecRe,combineVecIm,otherVecRe,otherVecIm, otherProb, numAmps) \
906  private (index)
907 # endif
908  {
909 # ifdef _OPENMP
910 # pragma omp for schedule (static)
911 # endif
912  for (index=0; index < numAmps; index++) {
913  combineVecRe[index] *= 1-otherProb;
914  combineVecIm[index] *= 1-otherProb;
915 
916  combineVecRe[index] += otherProb * otherVecRe[index];
917  combineVecIm[index] += otherProb * otherVecIm[index];
918  }
919  }
920 }
921 
924 
925  long long int index;
926  long long int numAmps = a.numAmpsPerChunk;
927 
928  qreal *aRe = a.stateVec.real;
929  qreal *aIm = a.stateVec.imag;
930  qreal *bRe = b.stateVec.real;
931  qreal *bIm = b.stateVec.imag;
932 
933  qreal trace = 0;
934  qreal difRe, difIm;
935 
936 # ifdef _OPENMP
937 # pragma omp parallel \
938  shared (aRe,aIm, bRe,bIm, numAmps) \
939  private (index,difRe,difIm) \
940  reduction ( +:trace )
941 # endif
942  {
943 # ifdef _OPENMP
944 # pragma omp for schedule (static)
945 # endif
946  for (index=0LL; index<numAmps; index++) {
947 
948  difRe = aRe[index] - bRe[index];
949  difIm = aIm[index] - bIm[index];
950  trace += difRe*difRe + difIm*difIm;
951  }
952  }
953 
954  return trace;
955 }
956 
959 
960  long long int index;
961  long long int numAmps = a.numAmpsPerChunk;
962 
963  qreal *aRe = a.stateVec.real;
964  qreal *aIm = a.stateVec.imag;
965  qreal *bRe = b.stateVec.real;
966  qreal *bIm = b.stateVec.imag;
967 
968  qreal trace = 0;
969 
970 # ifdef _OPENMP
971 # pragma omp parallel \
972  shared (aRe,aIm, bRe,bIm, numAmps) \
973  private (index) \
974  reduction ( +:trace )
975 # endif
976  {
977 # ifdef _OPENMP
978 # pragma omp for schedule (static)
979 # endif
980  for (index=0LL; index<numAmps; index++) {
981  trace += aRe[index]*bRe[index] + aIm[index]*bIm[index];
982  }
983  }
984 
985  return trace;
986 }
987 
988 
991 
992  /* Here, elements of pureState are not accessed (instead grabbed from qureg.pair).
993  * We only consult the attributes.
994  *
995  * qureg is a density matrix, and pureState is a statevector.
996  * Every node contains as many columns of qureg as amps by pureState.
997  * (each node contains an integer, exponent-of-2 number of whole columns of qureg)
998  * Ergo, this node contains columns:
999  * qureg.chunkID * pureState.numAmpsPerChunk to
1000  * (qureg.chunkID + 1) * pureState.numAmpsPerChunk
1001  *
1002  * The first pureState.numAmpsTotal elements of qureg.pairStateVec are the
1003  * entire pureState state-vector
1004  */
1005 
1006  // unpack everything for OPENMP
1007  qreal* vecRe = qureg.pairStateVec.real;
1008  qreal* vecIm = qureg.pairStateVec.imag;
1009  qreal* densRe = qureg.stateVec.real;
1010  qreal* densIm = qureg.stateVec.imag;
1011 
1012  int row, col;
1013  int dim = (int) pureState.numAmpsTotal;
1014  int colsPerNode = (int) pureState.numAmpsPerChunk;
1015  // using only int, because density matrix has squared as many amps so its
1016  // iteration would be impossible if the pureStates numAmpsTotal didn't fit into int
1017 
1018  // starting GLOBAL column index of the qureg columns on this node
1019  int startCol = (int) (qureg.chunkId * pureState.numAmpsPerChunk);
1020 
1021  qreal densElemRe, densElemIm;
1022  qreal prefacRe, prefacIm;
1023  qreal rowSumRe, rowSumIm;
1024  qreal vecElemRe, vecElemIm;
1025 
1026  // quantity computed by this node
1027  qreal globalSumRe = 0; // imag-component is assumed zero
1028 
1029 # ifdef _OPENMP
1030 # pragma omp parallel \
1031  shared (vecRe,vecIm,densRe,densIm, dim,colsPerNode,startCol) \
1032  private (row,col, prefacRe,prefacIm, rowSumRe,rowSumIm, densElemRe,densElemIm, vecElemRe,vecElemIm) \
1033  reduction ( +:globalSumRe )
1034 # endif
1035  {
1036 # ifdef _OPENMP
1037 # pragma omp for schedule (static)
1038 # endif
1039  // indices of my GLOBAL row
1040  for (row=0; row < dim; row++) {
1041 
1042  // single element of conj(pureState)
1043  prefacRe = vecRe[row];
1044  prefacIm = - vecIm[row];
1045 
1046  rowSumRe = 0;
1047  rowSumIm = 0;
1048 
1049  // indices of my LOCAL column
1050  for (col=0; col < colsPerNode; col++) {
1051 
1052  // my local density element
1053  densElemRe = densRe[row + dim*col];
1054  densElemIm = densIm[row + dim*col];
1055 
1056  // state-vector element
1057  vecElemRe = vecRe[startCol + col];
1058  vecElemIm = vecIm[startCol + col];
1059 
1060  rowSumRe += densElemRe*vecElemRe - densElemIm*vecElemIm;
1061  rowSumIm += densElemRe*vecElemIm + densElemIm*vecElemRe;
1062  }
1063 
1064  globalSumRe += rowSumRe*prefacRe - rowSumIm*prefacIm;
1065  }
1066  }
1067 
1068  return globalSumRe;
1069 }
1070 
1072 
1073  qreal innerProdReal = 0;
1074  qreal innerProdImag = 0;
1075 
1076  long long int index;
1077  long long int numAmps = bra.numAmpsPerChunk;
1078  qreal *braVecReal = bra.stateVec.real;
1079  qreal *braVecImag = bra.stateVec.imag;
1080  qreal *ketVecReal = ket.stateVec.real;
1081  qreal *ketVecImag = ket.stateVec.imag;
1082 
1083  qreal braRe, braIm, ketRe, ketIm;
1084 
1085 # ifdef _OPENMP
1086 # pragma omp parallel \
1087  shared (braVecReal, braVecImag, ketVecReal, ketVecImag, numAmps) \
1088  private (index, braRe, braIm, ketRe, ketIm) \
1089  reduction ( +:innerProdReal, innerProdImag )
1090 # endif
1091  {
1092 # ifdef _OPENMP
1093 # pragma omp for schedule (static)
1094 # endif
1095  for (index=0; index < numAmps; index++) {
1096  braRe = braVecReal[index];
1097  braIm = braVecImag[index];
1098  ketRe = ketVecReal[index];
1099  ketIm = ketVecImag[index];
1100 
1101  // conj(bra_i) * ket_i
1102  innerProdReal += braRe*ketRe + braIm*ketIm;
1103  innerProdImag += braRe*ketIm - braIm*ketRe;
1104  }
1105  }
1106 
1107  Complex innerProd;
1108  innerProd.real = innerProdReal;
1109  innerProd.imag = innerProdImag;
1110  return innerProd;
1111 }
1112 
1113 
1114 
1115 void densmatr_initClassicalState (Qureg qureg, long long int stateInd)
1116 {
1117  // dimension of the state vector
1118  long long int densityNumElems = qureg.numAmpsPerChunk;
1119 
1120  // Can't use qureg->stateVec as a private OMP var
1121  qreal *densityReal = qureg.stateVec.real;
1122  qreal *densityImag = qureg.stateVec.imag;
1123 
1124  // initialise the state to all zeros
1125  long long int index;
1126 # ifdef _OPENMP
1127 # pragma omp parallel \
1128  default (none) \
1129  shared (densityNumElems, densityReal, densityImag) \
1130  private (index)
1131 # endif
1132  {
1133 # ifdef _OPENMP
1134 # pragma omp for schedule (static)
1135 # endif
1136  for (index=0; index<densityNumElems; index++) {
1137  densityReal[index] = 0.0;
1138  densityImag[index] = 0.0;
1139  }
1140  }
1141 
1142  // index of the single density matrix elem to set non-zero
1143  long long int densityDim = 1LL << qureg.numQubitsRepresented;
1144  long long int densityInd = (densityDim + 1)*stateInd;
1145 
1146  // give the specified classical state prob 1
1147  if (qureg.chunkId == densityInd / densityNumElems){
1148  densityReal[densityInd % densityNumElems] = 1.0;
1149  densityImag[densityInd % densityNumElems] = 0.0;
1150  }
1151 }
1152 
1153 
1155 {
1156  // |+><+| = sum_i 1/sqrt(2^N) |i> 1/sqrt(2^N) <j| = sum_ij 1/2^N |i><j|
1157  long long int dim = (1LL << qureg.numQubitsRepresented);
1158  qreal probFactor = 1.0/((qreal) dim);
1159 
1160  // Can't use qureg->stateVec as a private OMP var
1161  qreal *densityReal = qureg.stateVec.real;
1162  qreal *densityImag = qureg.stateVec.imag;
1163 
1164  long long int index;
1165  long long int chunkSize = qureg.numAmpsPerChunk;
1166  // initialise the state to |+++..+++> = 1/normFactor {1, 1, 1, ...}
1167 # ifdef _OPENMP
1168 # pragma omp parallel \
1169  default (none) \
1170  shared (chunkSize, densityReal, densityImag, probFactor) \
1171  private (index)
1172 # endif
1173  {
1174 # ifdef _OPENMP
1175 # pragma omp for schedule (static)
1176 # endif
1177  for (index=0; index<chunkSize; index++) {
1178  densityReal[index] = probFactor;
1179  densityImag[index] = 0.0;
1180  }
1181  }
1182 }
1183 
1184 void densmatr_initPureStateLocal(Qureg targetQureg, Qureg copyQureg) {
1185 
1186  /* copyQureg amps aren't explicitly used - they're accessed through targetQureg.pair,
1187  * which contains the full pure statevector.
1188  * targetQureg has as many columns on node as copyQureg has amps
1189  */
1190 
1191  long long int colOffset = targetQureg.chunkId * copyQureg.numAmpsPerChunk;
1192  long long int colsPerNode = copyQureg.numAmpsPerChunk;
1193  long long int rowsPerNode = copyQureg.numAmpsTotal;
1194 
1195  // unpack vars for OpenMP
1196  qreal* vecRe = targetQureg.pairStateVec.real;
1197  qreal* vecIm = targetQureg.pairStateVec.imag;
1198  qreal* densRe = targetQureg.stateVec.real;
1199  qreal* densIm = targetQureg.stateVec.imag;
1200 
1201  long long int col, row, index;
1202 
1203  // a_i conj(a_j) |i><j|
1204  qreal ketRe, ketIm, braRe, braIm;
1205 
1206 # ifdef _OPENMP
1207 # pragma omp parallel \
1208  default (none) \
1209  shared (colOffset, colsPerNode,rowsPerNode, vecRe,vecIm,densRe,densIm) \
1210  private (col,row, ketRe,ketIm,braRe,braIm, index)
1211 # endif
1212  {
1213 # ifdef _OPENMP
1214 # pragma omp for schedule (static)
1215 # endif
1216  // local column
1217  for (col=0; col < colsPerNode; col++) {
1218 
1219  // global row
1220  for (row=0; row < rowsPerNode; row++) {
1221 
1222  // get pure state amps
1223  ketRe = vecRe[row];
1224  ketIm = vecIm[row];
1225  braRe = vecRe[col + colOffset];
1226  braIm = - vecIm[col + colOffset]; // minus for conjugation
1227 
1228  // update density matrix
1229  index = row + col*rowsPerNode; // local ind
1230  densRe[index] = ketRe*braRe - ketIm*braIm;
1231  densIm[index] = ketRe*braIm + ketIm*braRe;
1232  }
1233  }
1234  }
1235 }
1236 
1237 void statevec_setAmps(Qureg qureg, long long int startInd, qreal* reals, qreal* imags, long long int numAmps) {
1238 
1239  /* this is actually distributed, since the user's code runs on every node */
1240 
1241  // local start/end indices of the given amplitudes, assuming they fit in this chunk
1242  // these may be negative or above qureg.numAmpsPerChunk
1243  long long int localStartInd = startInd - qureg.chunkId*qureg.numAmpsPerChunk;
1244  long long int localEndInd = localStartInd + numAmps; // exclusive
1245 
1246  // add this to a local index to get corresponding elem in reals & imags
1247  long long int offset = qureg.chunkId*qureg.numAmpsPerChunk - startInd;
1248 
1249  // restrict these indices to fit into this chunk
1250  if (localStartInd < 0)
1251  localStartInd = 0;
1252  if (localEndInd > qureg.numAmpsPerChunk)
1253  localEndInd = qureg.numAmpsPerChunk;
1254  // they may now be out of order = no iterations
1255 
1256  // unpacking OpenMP vars
1257  long long int index;
1258  qreal* vecRe = qureg.stateVec.real;
1259  qreal* vecIm = qureg.stateVec.imag;
1260 
1261 # ifdef _OPENMP
1262 # pragma omp parallel \
1263  default (none) \
1264  shared (localStartInd,localEndInd, vecRe,vecIm, reals,imags, offset) \
1265  private (index)
1266 # endif
1267  {
1268 # ifdef _OPENMP
1269 # pragma omp for schedule (static)
1270 # endif
1271  // iterate these local inds - this might involve no iterations
1272  for (index=localStartInd; index < localEndInd; index++) {
1273  vecRe[index] = reals[index + offset];
1274  vecIm[index] = imags[index + offset];
1275  }
1276  }
1277 }
1278 
1279 void statevec_createQureg(Qureg *qureg, int numQubits, QuESTEnv env)
1280 {
1281  long long int numAmps = 1LL << numQubits;
1282  long long int numAmpsPerRank = numAmps/env.numRanks;
1283 
1284  if (numAmpsPerRank > SIZE_MAX) {
1285  printf("Could not allocate memory (cannot fit numAmps into size_t)!");
1286  exit (EXIT_FAILURE);
1287  }
1288 
1289  size_t arrSize = (size_t) (numAmpsPerRank * sizeof(*(qureg->stateVec.real)));
1290  qureg->stateVec.real = malloc(arrSize);
1291  qureg->stateVec.imag = malloc(arrSize);
1292  if (env.numRanks>1){
1293  qureg->pairStateVec.real = malloc(arrSize);
1294  qureg->pairStateVec.imag = malloc(arrSize);
1295  }
1296 
1297  if ( (!(qureg->stateVec.real) || !(qureg->stateVec.imag))
1298  && numAmpsPerRank ) {
1299  printf("Could not allocate memory!");
1300  exit (EXIT_FAILURE);
1301  }
1302 
1303  if ( env.numRanks>1 && (!(qureg->pairStateVec.real) || !(qureg->pairStateVec.imag))
1304  && numAmpsPerRank ) {
1305  printf("Could not allocate memory!");
1306  exit (EXIT_FAILURE);
1307  }
1308 
1309  qureg->numQubitsInStateVec = numQubits;
1310  qureg->numAmpsTotal = numAmps;
1311  qureg->numAmpsPerChunk = numAmpsPerRank;
1312  qureg->chunkId = env.rank;
1313  qureg->numChunks = env.numRanks;
1314  qureg->isDensityMatrix = 0;
1315 }
1316 
1318 
1319  qureg.numQubitsInStateVec = 0;
1320  qureg.numAmpsTotal = 0;
1321  qureg.numAmpsPerChunk = 0;
1322 
1323  free(qureg.stateVec.real);
1324  free(qureg.stateVec.imag);
1325  if (env.numRanks>1){
1326  free(qureg.pairStateVec.real);
1327  free(qureg.pairStateVec.imag);
1328  }
1329  qureg.stateVec.real = NULL;
1330  qureg.stateVec.imag = NULL;
1331  qureg.pairStateVec.real = NULL;
1332  qureg.pairStateVec.imag = NULL;
1333 }
1334 
1336 
1337  // the 2^numQubits values will be evenly split between the env.numRanks nodes
1338  DiagonalOp op;
1339  op.numQubits = numQubits;
1340  op.numElemsPerChunk = (1LL << numQubits) / env.numRanks;
1341  op.chunkId = env.rank;
1342  op.numChunks = env.numRanks;
1343 
1344  // allocate CPU memory (initialised to zero)
1345  op.real = (qreal*) calloc(op.numElemsPerChunk, sizeof(qreal));
1346  op.imag = (qreal*) calloc(op.numElemsPerChunk, sizeof(qreal));
1347 
1348  // check cpu memory allocation was successful
1349  if ( !op.real || !op.imag ) {
1350  printf("Could not allocate memory!\n");
1351  exit(EXIT_FAILURE);
1352  }
1353 
1354  return op;
1355 }
1356 
1358  free(op.real);
1359  free(op.imag);
1360 }
1361 
1363  // nothing to do on CPU
1364 }
1365 
1366 void statevec_reportStateToScreen(Qureg qureg, QuESTEnv env, int reportRank){
1367  long long int index;
1368  int rank;
1369  if (qureg.numQubitsInStateVec<=5){
1370  for (rank=0; rank<qureg.numChunks; rank++){
1371  if (qureg.chunkId==rank){
1372  if (reportRank) {
1373  printf("Reporting state from rank %d [\n", qureg.chunkId);
1374  printf("real, imag\n");
1375  } else if (rank==0) {
1376  printf("Reporting state [\n");
1377  printf("real, imag\n");
1378  }
1379 
1380  for(index=0; index<qureg.numAmpsPerChunk; index++){
1381  //printf(REAL_STRING_FORMAT ", " REAL_STRING_FORMAT "\n", qureg.pairStateVec.real[index], qureg.pairStateVec.imag[index]);
1382  printf(REAL_STRING_FORMAT ", " REAL_STRING_FORMAT "\n", qureg.stateVec.real[index], qureg.stateVec.imag[index]);
1383  }
1384  if (reportRank || rank==qureg.numChunks-1) printf("]\n");
1385  }
1386  syncQuESTEnv(env);
1387  }
1388  } else printf("Error: reportStateToScreen will not print output for systems of more than 5 qubits.\n");
1389 }
1390 void statevec_getEnvironmentString(QuESTEnv env, Qureg qureg, char str[200]){
1391  int numThreads=1;
1392 # ifdef _OPENMP
1393  numThreads=omp_get_max_threads();
1394 # endif
1395  sprintf(str, "%dqubits_CPU_%dranksx%dthreads", qureg.numQubitsInStateVec, env.numRanks, numThreads);
1396 }
1397 
1399 {
1400  long long int stateVecSize;
1401  long long int index;
1402 
1403  // dimension of the state vector
1404  stateVecSize = qureg.numAmpsPerChunk;
1405 
1406  // Can't use qureg->stateVec as a private OMP var
1407  qreal *stateVecReal = qureg.stateVec.real;
1408  qreal *stateVecImag = qureg.stateVec.imag;
1409 
1410  // initialise the state-vector to all-zeroes
1411 # ifdef _OPENMP
1412 # pragma omp parallel \
1413  default (none) \
1414  shared (stateVecSize, stateVecReal, stateVecImag) \
1415  private (index)
1416 # endif
1417  {
1418 # ifdef _OPENMP
1419 # pragma omp for schedule (static)
1420 # endif
1421  for (index=0; index<stateVecSize; index++) {
1422  stateVecReal[index] = 0.0;
1423  stateVecImag[index] = 0.0;
1424  }
1425  }
1426 }
1427 
1429 {
1430  statevec_initBlankState(qureg);
1431  if (qureg.chunkId==0){
1432  // zero state |0000..0000> has probability 1
1433  qureg.stateVec.real[0] = 1.0;
1434  qureg.stateVec.imag[0] = 0.0;
1435  }
1436 }
1437 
1439 {
1440  long long int chunkSize, stateVecSize;
1441  long long int index;
1442 
1443  // dimension of the state vector
1444  chunkSize = qureg.numAmpsPerChunk;
1445  stateVecSize = chunkSize*qureg.numChunks;
1446  qreal normFactor = 1.0/sqrt((qreal)stateVecSize);
1447 
1448  // Can't use qureg->stateVec as a private OMP var
1449  qreal *stateVecReal = qureg.stateVec.real;
1450  qreal *stateVecImag = qureg.stateVec.imag;
1451 
1452  // initialise the state to |+++..+++> = 1/normFactor {1, 1, 1, ...}
1453 # ifdef _OPENMP
1454 # pragma omp parallel \
1455  default (none) \
1456  shared (chunkSize, stateVecReal, stateVecImag, normFactor) \
1457  private (index)
1458 # endif
1459  {
1460 # ifdef _OPENMP
1461 # pragma omp for schedule (static)
1462 # endif
1463  for (index=0; index<chunkSize; index++) {
1464  stateVecReal[index] = normFactor;
1465  stateVecImag[index] = 0.0;
1466  }
1467  }
1468 }
1469 
1470 void statevec_initClassicalState (Qureg qureg, long long int stateInd)
1471 {
1472  long long int stateVecSize;
1473  long long int index;
1474 
1475  // dimension of the state vector
1476  stateVecSize = qureg.numAmpsPerChunk;
1477 
1478  // Can't use qureg->stateVec as a private OMP var
1479  qreal *stateVecReal = qureg.stateVec.real;
1480  qreal *stateVecImag = qureg.stateVec.imag;
1481 
1482  // initialise the state to vector to all zeros
1483 # ifdef _OPENMP
1484 # pragma omp parallel \
1485  default (none) \
1486  shared (stateVecSize, stateVecReal, stateVecImag) \
1487  private (index)
1488 # endif
1489  {
1490 # ifdef _OPENMP
1491 # pragma omp for schedule (static)
1492 # endif
1493  for (index=0; index<stateVecSize; index++) {
1494  stateVecReal[index] = 0.0;
1495  stateVecImag[index] = 0.0;
1496  }
1497  }
1498 
1499  // give the specified classical state prob 1
1500  if (qureg.chunkId == stateInd/stateVecSize){
1501  stateVecReal[stateInd % stateVecSize] = 1.0;
1502  stateVecImag[stateInd % stateVecSize] = 0.0;
1503  }
1504 }
1505 
1506 void statevec_cloneQureg(Qureg targetQureg, Qureg copyQureg) {
1507 
1508  // registers are equal sized, so nodes hold the same state-vector partitions
1509  long long int stateVecSize;
1510  long long int index;
1511 
1512  // dimension of the state vector
1513  stateVecSize = targetQureg.numAmpsPerChunk;
1514 
1515  // Can't use qureg->stateVec as a private OMP var
1516  qreal *targetStateVecReal = targetQureg.stateVec.real;
1517  qreal *targetStateVecImag = targetQureg.stateVec.imag;
1518  qreal *copyStateVecReal = copyQureg.stateVec.real;
1519  qreal *copyStateVecImag = copyQureg.stateVec.imag;
1520 
1521  // initialise the state to |0000..0000>
1522 # ifdef _OPENMP
1523 # pragma omp parallel \
1524  default (none) \
1525  shared (stateVecSize, targetStateVecReal, targetStateVecImag, copyStateVecReal, copyStateVecImag) \
1526  private (index)
1527 # endif
1528  {
1529 # ifdef _OPENMP
1530 # pragma omp for schedule (static)
1531 # endif
1532  for (index=0; index<stateVecSize; index++) {
1533  targetStateVecReal[index] = copyStateVecReal[index];
1534  targetStateVecImag[index] = copyStateVecImag[index];
1535  }
1536  }
1537 }
1538 
1545 void statevec_initStateOfSingleQubit(Qureg *qureg, int qubitId, int outcome)
1546 {
1547  long long int chunkSize, stateVecSize;
1548  long long int index;
1549  int bit;
1550  long long int chunkId=qureg->chunkId;
1551 
1552  // dimension of the state vector
1553  chunkSize = qureg->numAmpsPerChunk;
1554  stateVecSize = chunkSize*qureg->numChunks;
1555  qreal normFactor = 1.0/sqrt((qreal)stateVecSize/2.0);
1556 
1557  // Can't use qureg->stateVec as a private OMP var
1558  qreal *stateVecReal = qureg->stateVec.real;
1559  qreal *stateVecImag = qureg->stateVec.imag;
1560 
1561  // initialise the state to |0000..0000>
1562 # ifdef _OPENMP
1563 # pragma omp parallel \
1564  default (none) \
1565  shared (chunkSize, stateVecReal, stateVecImag, normFactor, qubitId, outcome, chunkId) \
1566  private (index, bit)
1567 # endif
1568  {
1569 # ifdef _OPENMP
1570 # pragma omp for schedule (static)
1571 # endif
1572  for (index=0; index<chunkSize; index++) {
1573  bit = extractBit(qubitId, index+chunkId*chunkSize);
1574  if (bit==outcome) {
1575  stateVecReal[index] = normFactor;
1576  stateVecImag[index] = 0.0;
1577  } else {
1578  stateVecReal[index] = 0.0;
1579  stateVecImag[index] = 0.0;
1580  }
1581  }
1582  }
1583 }
1584 
1585 
1592 {
1593  long long int chunkSize;
1594  long long int index;
1595  long long int indexOffset;
1596 
1597  // dimension of the state vector
1598  chunkSize = qureg.numAmpsPerChunk;
1599 
1600  // Can't use qureg->stateVec as a private OMP var
1601  qreal *stateVecReal = qureg.stateVec.real;
1602  qreal *stateVecImag = qureg.stateVec.imag;
1603 
1604  indexOffset = chunkSize * qureg.chunkId;
1605 
1606  // initialise the state to |0000..0000>
1607 # ifdef _OPENMP
1608 # pragma omp parallel \
1609  default (none) \
1610  shared (chunkSize, stateVecReal, stateVecImag, indexOffset) \
1611  private (index)
1612 # endif
1613  {
1614 # ifdef _OPENMP
1615 # pragma omp for schedule (static)
1616 # endif
1617  for (index=0; index<chunkSize; index++) {
1618  stateVecReal[index] = ((indexOffset + index)*2.0)/10.0;
1619  stateVecImag[index] = ((indexOffset + index)*2.0+1.0)/10.0;
1620  }
1621  }
1622 }
1623 
1624 // returns 1 if successful, else 0
1625 int statevec_initStateFromSingleFile(Qureg *qureg, char filename[200], QuESTEnv env){
1626  long long int chunkSize, stateVecSize;
1627  long long int indexInChunk, totalIndex;
1628 
1629  chunkSize = qureg->numAmpsPerChunk;
1630  stateVecSize = chunkSize*qureg->numChunks;
1631 
1632  qreal *stateVecReal = qureg->stateVec.real;
1633  qreal *stateVecImag = qureg->stateVec.imag;
1634 
1635  FILE *fp;
1636  char line[200];
1637 
1638  for (int rank=0; rank<(qureg->numChunks); rank++){
1639  if (rank==qureg->chunkId){
1640  fp = fopen(filename, "r");
1641 
1642  // indicate file open failure
1643  if (fp == NULL)
1644  return 0;
1645 
1646  indexInChunk = 0; totalIndex = 0;
1647  while (fgets(line, sizeof(char)*200, fp) != NULL && totalIndex<stateVecSize){
1648  if (line[0]!='#'){
1649  int chunkId = (int) (totalIndex/chunkSize);
1650  if (chunkId==qureg->chunkId){
1651  # if QuEST_PREC==1
1652  sscanf(line, "%f, %f", &(stateVecReal[indexInChunk]),
1653  &(stateVecImag[indexInChunk]));
1654  # elif QuEST_PREC==2
1655  sscanf(line, "%lf, %lf", &(stateVecReal[indexInChunk]),
1656  &(stateVecImag[indexInChunk]));
1657  # elif QuEST_PREC==4
1658  sscanf(line, "%Lf, %Lf", &(stateVecReal[indexInChunk]),
1659  &(stateVecImag[indexInChunk]));
1660  # endif
1661  indexInChunk += 1;
1662  }
1663  totalIndex += 1;
1664  }
1665  }
1666  fclose(fp);
1667  }
1668  syncQuESTEnv(env);
1669  }
1670 
1671  // indicate success
1672  return 1;
1673 }
1674 
1675 int statevec_compareStates(Qureg mq1, Qureg mq2, qreal precision){
1676  qreal diff;
1677  long long int chunkSize = mq1.numAmpsPerChunk;
1678 
1679  for (long long int i=0; i<chunkSize; i++){
1680  diff = absReal(mq1.stateVec.real[i] - mq2.stateVec.real[i]);
1681  if (diff>precision) return 0;
1682  diff = absReal(mq1.stateVec.imag[i] - mq2.stateVec.imag[i]);
1683  if (diff>precision) return 0;
1684  }
1685  return 1;
1686 }
1687 
1688 void statevec_compactUnitaryLocal (Qureg qureg, int targetQubit, Complex alpha, Complex beta)
1689 {
1690  long long int sizeBlock, sizeHalfBlock;
1691  long long int thisBlock, // current block
1692  indexUp,indexLo; // current index and corresponding index in lower half block
1693 
1694  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1695  long long int thisTask;
1696  long long int numTasks=qureg.numAmpsPerChunk>>1;
1697 
1698  // set dimensions
1699  sizeHalfBlock = 1LL << targetQubit;
1700  sizeBlock = 2LL * sizeHalfBlock;
1701 
1702  // Can't use qureg.stateVec as a private OMP var
1703  qreal *stateVecReal = qureg.stateVec.real;
1704  qreal *stateVecImag = qureg.stateVec.imag;
1705  qreal alphaImag=alpha.imag, alphaReal=alpha.real;
1706  qreal betaImag=beta.imag, betaReal=beta.real;
1707 
1708 # ifdef _OPENMP
1709 # pragma omp parallel \
1710  default (none) \
1711  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, numTasks) \
1712  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1713 # endif
1714  {
1715 # ifdef _OPENMP
1716 # pragma omp for schedule (static)
1717 # endif
1718  for (thisTask=0; thisTask<numTasks; thisTask++) {
1719 
1720  thisBlock = thisTask / sizeHalfBlock;
1721  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1722  indexLo = indexUp + sizeHalfBlock;
1723 
1724  // store current state vector values in temp variables
1725  stateRealUp = stateVecReal[indexUp];
1726  stateImagUp = stateVecImag[indexUp];
1727 
1728  stateRealLo = stateVecReal[indexLo];
1729  stateImagLo = stateVecImag[indexLo];
1730 
1731  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
1732  stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
1733  - betaReal*stateRealLo - betaImag*stateImagLo;
1734  stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
1735  - betaReal*stateImagLo + betaImag*stateRealLo;
1736 
1737  // state[indexLo] = beta * state[indexUp] + conj(alpha) * state[indexLo]
1738  stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
1739  + alphaReal*stateRealLo + alphaImag*stateImagLo;
1740  stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
1741  + alphaReal*stateImagLo - alphaImag*stateRealLo;
1742  }
1743  }
1744 
1745 }
1746 
1747 void statevec_multiControlledTwoQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u) {
1748 
1749  // can't use qureg.stateVec as a private OMP var
1750  qreal *reVec = qureg.stateVec.real;
1751  qreal *imVec = qureg.stateVec.imag;
1752 
1753  // the global (between all nodes) index of this node's start index
1754  long long int globalIndStart = qureg.chunkId*qureg.numAmpsPerChunk;
1755 
1756  long long int numTasks = qureg.numAmpsPerChunk >> 2; // each iteration updates 4 amplitudes
1757  long long int thisTask;
1758  long long int thisGlobalInd00;
1759  long long int ind00, ind01, ind10, ind11;
1760  qreal re00, re01, re10, re11;
1761  qreal im00, im01, im10, im11;
1762 
1763 # ifdef _OPENMP
1764 # pragma omp parallel \
1765  default (none) \
1766  shared (reVec,imVec,globalIndStart,numTasks,ctrlMask,u,q2,q1) \
1767  private (thisTask, thisGlobalInd00, ind00,ind01,ind10,ind11, re00,re01,re10,re11, im00,im01,im10,im11)
1768 # endif
1769  {
1770 # ifdef _OPENMP
1771 # pragma omp for schedule (static)
1772 # endif
1773  for (thisTask=0; thisTask<numTasks; thisTask++) {
1774 
1775  // determine ind00 of |..0..0..>
1776  ind00 = insertTwoZeroBits(thisTask, q1, q2);
1777 
1778  // skip amplitude if controls aren't in 1 state (overloaded for speed)
1779  thisGlobalInd00 = ind00 + globalIndStart;
1780  if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1781  continue;
1782 
1783  // inds of |..0..1..>, |..1..0..> and |..1..1..>
1784  ind01 = flipBit(ind00, q1);
1785  ind10 = flipBit(ind00, q2);
1786  ind11 = flipBit(ind01, q2);
1787 
1788  // extract statevec amplitudes
1789  re00 = reVec[ind00]; im00 = imVec[ind00];
1790  re01 = reVec[ind01]; im01 = imVec[ind01];
1791  re10 = reVec[ind10]; im10 = imVec[ind10];
1792  re11 = reVec[ind11]; im11 = imVec[ind11];
1793 
1794  // apply u * {amp00, amp01, amp10, amp11}
1795  reVec[ind00] =
1796  u.real[0][0]*re00 - u.imag[0][0]*im00 +
1797  u.real[0][1]*re01 - u.imag[0][1]*im01 +
1798  u.real[0][2]*re10 - u.imag[0][2]*im10 +
1799  u.real[0][3]*re11 - u.imag[0][3]*im11;
1800  imVec[ind00] =
1801  u.imag[0][0]*re00 + u.real[0][0]*im00 +
1802  u.imag[0][1]*re01 + u.real[0][1]*im01 +
1803  u.imag[0][2]*re10 + u.real[0][2]*im10 +
1804  u.imag[0][3]*re11 + u.real[0][3]*im11;
1805 
1806  reVec[ind01] =
1807  u.real[1][0]*re00 - u.imag[1][0]*im00 +
1808  u.real[1][1]*re01 - u.imag[1][1]*im01 +
1809  u.real[1][2]*re10 - u.imag[1][2]*im10 +
1810  u.real[1][3]*re11 - u.imag[1][3]*im11;
1811  imVec[ind01] =
1812  u.imag[1][0]*re00 + u.real[1][0]*im00 +
1813  u.imag[1][1]*re01 + u.real[1][1]*im01 +
1814  u.imag[1][2]*re10 + u.real[1][2]*im10 +
1815  u.imag[1][3]*re11 + u.real[1][3]*im11;
1816 
1817  reVec[ind10] =
1818  u.real[2][0]*re00 - u.imag[2][0]*im00 +
1819  u.real[2][1]*re01 - u.imag[2][1]*im01 +
1820  u.real[2][2]*re10 - u.imag[2][2]*im10 +
1821  u.real[2][3]*re11 - u.imag[2][3]*im11;
1822  imVec[ind10] =
1823  u.imag[2][0]*re00 + u.real[2][0]*im00 +
1824  u.imag[2][1]*re01 + u.real[2][1]*im01 +
1825  u.imag[2][2]*re10 + u.real[2][2]*im10 +
1826  u.imag[2][3]*re11 + u.real[2][3]*im11;
1827 
1828  reVec[ind11] =
1829  u.real[3][0]*re00 - u.imag[3][0]*im00 +
1830  u.real[3][1]*re01 - u.imag[3][1]*im01 +
1831  u.real[3][2]*re10 - u.imag[3][2]*im10 +
1832  u.real[3][3]*re11 - u.imag[3][3]*im11;
1833  imVec[ind11] =
1834  u.imag[3][0]*re00 + u.real[3][0]*im00 +
1835  u.imag[3][1]*re01 + u.real[3][1]*im01 +
1836  u.imag[3][2]*re10 + u.real[3][2]*im10 +
1837  u.imag[3][3]*re11 + u.real[3][3]*im11;
1838  }
1839  }
1840 }
1841 
1842 int qsortComp(const void *a, const void *b) {
1843  return *(int*)a - *(int*)b;
1844 }
1845 
1846 void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int* targs, int numTargs, ComplexMatrixN u)
1847 {
1848  // can't use qureg.stateVec as a private OMP var
1849  qreal *reVec = qureg.stateVec.real;
1850  qreal *imVec = qureg.stateVec.imag;
1851 
1852  long long int numTasks = qureg.numAmpsPerChunk >> numTargs; // kernel called on every 1 in 2^numTargs amplitudes
1853  long long int numTargAmps = 1 << u.numQubits; // num amps to be modified by each task
1854 
1855  // the global (between all nodes) index of this node's start index
1856  long long int globalIndStart = qureg.chunkId*qureg.numAmpsPerChunk;
1857 
1858  long long int thisTask;
1859  long long int thisInd00; // this thread's index of |..0..0..> (target qubits = 0)
1860  long long int thisGlobalInd00; // the global (between all nodes) index of this thread's |..0..0..> state
1861  long long int ind; // each thread's iteration of amplitudes to modify
1862  int i, t, r, c; // each thread's iteration of amps and targets
1863  qreal reElem, imElem; // each thread's iteration of u elements
1864 
1865  // each thread/task will record and modify numTargAmps amplitudes, privately
1866  // (of course, tasks eliminated by the ctrlMask won't edit their allocation)
1867  long long int ampInds[numTargAmps];
1868  qreal reAmps[numTargAmps];
1869  qreal imAmps[numTargAmps];
1870 
1871  // we need a sorted targets list to find thisInd00 for each task.
1872  // we can't modify targets, because the user-ordering of targets matters in u
1873  int sortedTargs[numTargs];
1874  for (int t=0; t < numTargs; t++)
1875  sortedTargs[t] = targs[t];
1876  qsort(sortedTargs, numTargs, sizeof(int), qsortComp);
1877 
1878 # ifdef _OPENMP
1879 # pragma omp parallel \
1880  default (none) \
1881  shared (reVec,imVec, numTasks,numTargAmps,globalIndStart, ctrlMask,targs,sortedTargs,u,numTargs) \
1882  private (thisTask,thisInd00,thisGlobalInd00,ind,i,t,r,c,reElem,imElem, ampInds,reAmps,imAmps)
1883 # endif
1884  {
1885 # ifdef _OPENMP
1886 # pragma omp for schedule (static)
1887 # endif
1888  for (thisTask=0; thisTask<numTasks; thisTask++) {
1889 
1890  // find this task's start index (where all targs are 0)
1891  thisInd00 = thisTask;
1892  for (t=0; t < numTargs; t++)
1893  thisInd00 = insertZeroBit(thisInd00, sortedTargs[t]);
1894 
1895  // this task only modifies amplitudes if control qubits are 1 for this state
1896  thisGlobalInd00 = thisInd00 + globalIndStart;
1897  if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1898  continue;
1899 
1900  // determine the indices and record values of this tasks's target amps
1901  for (i=0; i < numTargAmps; i++) {
1902 
1903  // get statevec index of current target qubit assignment
1904  ind = thisInd00;
1905  for (t=0; t < numTargs; t++)
1906  if (extractBit(t, i))
1907  ind = flipBit(ind, targs[t]);
1908 
1909  // update this tasks's private arrays
1910  ampInds[i] = ind;
1911  reAmps [i] = reVec[ind];
1912  imAmps [i] = imVec[ind];
1913  }
1914 
1915  // modify this tasks's target amplitudes
1916  for (r=0; r < numTargAmps; r++) {
1917  ind = ampInds[r];
1918  reVec[ind] = 0;
1919  imVec[ind] = 0;
1920 
1921  for (c=0; c < numTargAmps; c++) {
1922  reElem = u.real[r][c];
1923  imElem = u.imag[r][c];
1924  reVec[ind] += reAmps[c]*reElem - imAmps[c]*imElem;
1925  imVec[ind] += reAmps[c]*imElem + imAmps[c]*reElem;
1926  }
1927  }
1928  }
1929  }
1930 }
1931 
1932 void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
1933 {
1934  long long int sizeBlock, sizeHalfBlock;
1935  long long int thisBlock, // current block
1936  indexUp,indexLo; // current index and corresponding index in lower half block
1937 
1938  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1939  long long int thisTask;
1940  long long int numTasks=qureg.numAmpsPerChunk>>1;
1941 
1942  // set dimensions
1943  sizeHalfBlock = 1LL << targetQubit;
1944  sizeBlock = 2LL * sizeHalfBlock;
1945 
1946  // Can't use qureg.stateVec as a private OMP var
1947  qreal *stateVecReal = qureg.stateVec.real;
1948  qreal *stateVecImag = qureg.stateVec.imag;
1949 
1950 # ifdef _OPENMP
1951 # pragma omp parallel \
1952  default (none) \
1953  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks) \
1954  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1955 # endif
1956  {
1957 # ifdef _OPENMP
1958 # pragma omp for schedule (static)
1959 # endif
1960  for (thisTask=0; thisTask<numTasks; thisTask++) {
1961 
1962  thisBlock = thisTask / sizeHalfBlock;
1963  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1964  indexLo = indexUp + sizeHalfBlock;
1965 
1966  // store current state vector values in temp variables
1967  stateRealUp = stateVecReal[indexUp];
1968  stateImagUp = stateVecImag[indexUp];
1969 
1970  stateRealLo = stateVecReal[indexLo];
1971  stateImagLo = stateVecImag[indexLo];
1972 
1973 
1974  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
1975  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
1976  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
1977  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
1978  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
1979 
1980  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
1981  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
1982  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
1983  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
1984  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
1985 
1986  }
1987  }
1988 }
1989 
2002  Complex rot1, Complex rot2,
2003  ComplexArray stateVecUp,
2004  ComplexArray stateVecLo,
2005  ComplexArray stateVecOut)
2006 {
2007 
2008  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2009  long long int thisTask;
2010  long long int numTasks=qureg.numAmpsPerChunk;
2011 
2012  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2013  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2014  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2015  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2016  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2017 
2018 # ifdef _OPENMP
2019 # pragma omp parallel \
2020  default (none) \
2021  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2022  rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks) \
2023  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2024 # endif
2025  {
2026 # ifdef _OPENMP
2027 # pragma omp for schedule (static)
2028 # endif
2029  for (thisTask=0; thisTask<numTasks; thisTask++) {
2030  // store current state vector values in temp variables
2031  stateRealUp = stateVecRealUp[thisTask];
2032  stateImagUp = stateVecImagUp[thisTask];
2033 
2034  stateRealLo = stateVecRealLo[thisTask];
2035  stateImagLo = stateVecImagLo[thisTask];
2036 
2037  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2038  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2039  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2040  }
2041  }
2042 }
2043 
2057  Complex rot1, Complex rot2,
2058  ComplexArray stateVecUp,
2059  ComplexArray stateVecLo,
2060  ComplexArray stateVecOut)
2061 {
2062 
2063  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2064  long long int thisTask;
2065  long long int numTasks=qureg.numAmpsPerChunk;
2066 
2067  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2068  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2069  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2070  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2071  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2072 
2073 
2074 # ifdef _OPENMP
2075 # pragma omp parallel \
2076  default (none) \
2077  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2078  rot1Real, rot1Imag, rot2Real, rot2Imag,numTasks) \
2079  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2080 # endif
2081  {
2082 # ifdef _OPENMP
2083 # pragma omp for schedule (static)
2084 # endif
2085  for (thisTask=0; thisTask<numTasks; thisTask++) {
2086  // store current state vector values in temp variables
2087  stateRealUp = stateVecRealUp[thisTask];
2088  stateImagUp = stateVecImagUp[thisTask];
2089 
2090  stateRealLo = stateVecRealLo[thisTask];
2091  stateImagLo = stateVecImagLo[thisTask];
2092 
2093  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2094  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2095  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2096  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2097  }
2098  }
2099 }
2100 
2101 void statevec_controlledCompactUnitaryLocal (Qureg qureg, int controlQubit, int targetQubit,
2102  Complex alpha, Complex beta)
2103 {
2104  long long int sizeBlock, sizeHalfBlock;
2105  long long int thisBlock, // current block
2106  indexUp,indexLo; // current index and corresponding index in lower half block
2107 
2108  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2109  long long int thisTask;
2110  long long int numTasks=qureg.numAmpsPerChunk>>1;
2111  long long int chunkSize=qureg.numAmpsPerChunk;
2112  long long int chunkId=qureg.chunkId;
2113 
2114  int controlBit;
2115 
2116  // set dimensions
2117  sizeHalfBlock = 1LL << targetQubit;
2118  sizeBlock = 2LL * sizeHalfBlock;
2119 
2120  // Can't use qureg.stateVec as a private OMP var
2121  qreal *stateVecReal = qureg.stateVec.real;
2122  qreal *stateVecImag = qureg.stateVec.imag;
2123  qreal alphaImag=alpha.imag, alphaReal=alpha.real;
2124  qreal betaImag=beta.imag, betaReal=beta.real;
2125 
2126 # ifdef _OPENMP
2127 # pragma omp parallel \
2128  default (none) \
2129  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, \
2130  numTasks,chunkId,chunkSize,controlQubit) \
2131  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2132 # endif
2133  {
2134 # ifdef _OPENMP
2135 # pragma omp for schedule (static)
2136 # endif
2137  for (thisTask=0; thisTask<numTasks; thisTask++) {
2138 
2139  thisBlock = thisTask / sizeHalfBlock;
2140  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2141  indexLo = indexUp + sizeHalfBlock;
2142 
2143  controlBit = extractBit (controlQubit, indexUp+chunkId*chunkSize);
2144  if (controlBit){
2145  // store current state vector values in temp variables
2146  stateRealUp = stateVecReal[indexUp];
2147  stateImagUp = stateVecImag[indexUp];
2148 
2149  stateRealLo = stateVecReal[indexLo];
2150  stateImagLo = stateVecImag[indexLo];
2151 
2152  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2153  stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
2154  - betaReal*stateRealLo - betaImag*stateImagLo;
2155  stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
2156  - betaReal*stateImagLo + betaImag*stateRealLo;
2157 
2158  // state[indexLo] = beta * state[indexUp] + conj(alpha) * state[indexLo]
2159  stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
2160  + alphaReal*stateRealLo + alphaImag*stateImagLo;
2161  stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
2162  + alphaReal*stateImagLo - alphaImag*stateRealLo;
2163  }
2164  }
2165  }
2166 
2167 }
2168 
2169 /* ctrlQubitsMask is a bit mask indicating which qubits are control Qubits
2170  * ctrlFlipMask is a bit mask indicating which control qubits should be 'flipped'
2171  * in the condition, i.e. they should have value 0 when the unitary is applied
2172  */
2174  Qureg qureg, int targetQubit,
2175  long long int ctrlQubitsMask, long long int ctrlFlipMask,
2176  ComplexMatrix2 u)
2177 {
2178  long long int sizeBlock, sizeHalfBlock;
2179  long long int thisBlock, // current block
2180  indexUp,indexLo; // current index and corresponding index in lower half block
2181 
2182  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2183  long long int thisTask;
2184  long long int numTasks=qureg.numAmpsPerChunk>>1;
2185  long long int chunkSize=qureg.numAmpsPerChunk;
2186  long long int chunkId=qureg.chunkId;
2187 
2188  // set dimensions
2189  sizeHalfBlock = 1LL << targetQubit;
2190  sizeBlock = 2LL * sizeHalfBlock;
2191 
2192  // Can't use qureg.stateVec as a private OMP var
2193  qreal *stateVecReal = qureg.stateVec.real;
2194  qreal *stateVecImag = qureg.stateVec.imag;
2195 
2196 # ifdef _OPENMP
2197 # pragma omp parallel \
2198  default (none) \
2199  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u, ctrlQubitsMask,ctrlFlipMask, \
2200  numTasks,chunkId,chunkSize) \
2201  private (thisTask,thisBlock, indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2202 # endif
2203  {
2204 # ifdef _OPENMP
2205 # pragma omp for schedule (static)
2206 # endif
2207  for (thisTask=0; thisTask<numTasks; thisTask++) {
2208 
2209  thisBlock = thisTask / sizeHalfBlock;
2210  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2211  indexLo = indexUp + sizeHalfBlock;
2212 
2213 
2214  // take the basis index, flip the designated (XOR) 'control' bits, AND with the controls.
2215  // if this equals the control mask, the control qubits have the desired values in the basis index
2216  if (ctrlQubitsMask == (ctrlQubitsMask & ((indexUp+chunkId*chunkSize) ^ ctrlFlipMask))) {
2217  // store current state vector values in temp variables
2218  stateRealUp = stateVecReal[indexUp];
2219  stateImagUp = stateVecImag[indexUp];
2220 
2221  stateRealLo = stateVecReal[indexLo];
2222  stateImagLo = stateVecImag[indexLo];
2223 
2224  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
2225  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
2226  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
2227  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
2228  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
2229 
2230  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
2231  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
2232  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
2233  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
2234  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
2235  }
2236  }
2237  }
2238 
2239 }
2240 
2241 void statevec_controlledUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit,
2242  ComplexMatrix2 u)
2243 {
2244  long long int sizeBlock, sizeHalfBlock;
2245  long long int thisBlock, // current block
2246  indexUp,indexLo; // current index and corresponding index in lower half block
2247 
2248  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2249  long long int thisTask;
2250  long long int numTasks=qureg.numAmpsPerChunk>>1;
2251  long long int chunkSize=qureg.numAmpsPerChunk;
2252  long long int chunkId=qureg.chunkId;
2253 
2254  int controlBit;
2255 
2256  // set dimensions
2257  sizeHalfBlock = 1LL << targetQubit;
2258  sizeBlock = 2LL * sizeHalfBlock;
2259 
2260  // Can't use qureg.stateVec as a private OMP var
2261  qreal *stateVecReal = qureg.stateVec.real;
2262  qreal *stateVecImag = qureg.stateVec.imag;
2263 
2264 # ifdef _OPENMP
2265 # pragma omp parallel \
2266  default (none) \
2267  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks,chunkId,chunkSize,controlQubit) \
2268  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2269 # endif
2270  {
2271 # ifdef _OPENMP
2272 # pragma omp for schedule (static)
2273 # endif
2274  for (thisTask=0; thisTask<numTasks; thisTask++) {
2275 
2276  thisBlock = thisTask / sizeHalfBlock;
2277  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2278  indexLo = indexUp + sizeHalfBlock;
2279 
2280  controlBit = extractBit (controlQubit, indexUp+chunkId*chunkSize);
2281  if (controlBit){
2282  // store current state vector values in temp variables
2283  stateRealUp = stateVecReal[indexUp];
2284  stateImagUp = stateVecImag[indexUp];
2285 
2286  stateRealLo = stateVecReal[indexLo];
2287  stateImagLo = stateVecImag[indexLo];
2288 
2289 
2290  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
2291  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
2292  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
2293  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
2294  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
2295 
2296  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
2297  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
2298  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
2299  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
2300  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
2301  }
2302  }
2303  }
2304 
2305 }
2306 
2320  Complex rot1, Complex rot2,
2321  ComplexArray stateVecUp,
2322  ComplexArray stateVecLo,
2323  ComplexArray stateVecOut)
2324 {
2325 
2326  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2327  long long int thisTask;
2328  long long int numTasks=qureg.numAmpsPerChunk;
2329  long long int chunkSize=qureg.numAmpsPerChunk;
2330  long long int chunkId=qureg.chunkId;
2331 
2332  int controlBit;
2333 
2334  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2335  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2336  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2337  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2338  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2339 
2340 # ifdef _OPENMP
2341 # pragma omp parallel \
2342  default (none) \
2343  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2344  rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks,chunkId,chunkSize,controlQubit) \
2345  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2346 # endif
2347  {
2348 # ifdef _OPENMP
2349 # pragma omp for schedule (static)
2350 # endif
2351  for (thisTask=0; thisTask<numTasks; thisTask++) {
2352  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2353  if (controlBit){
2354  // store current state vector values in temp variables
2355  stateRealUp = stateVecRealUp[thisTask];
2356  stateImagUp = stateVecImagUp[thisTask];
2357 
2358  stateRealLo = stateVecRealLo[thisTask];
2359  stateImagLo = stateVecImagLo[thisTask];
2360 
2361  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2362  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2363  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2364  }
2365  }
2366  }
2367 }
2368 
2381 void statevec_controlledUnitaryDistributed (Qureg qureg, int controlQubit,
2382  Complex rot1, Complex rot2,
2383  ComplexArray stateVecUp,
2384  ComplexArray stateVecLo,
2385  ComplexArray stateVecOut)
2386 {
2387 
2388  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2389  long long int thisTask;
2390  long long int numTasks=qureg.numAmpsPerChunk;
2391  long long int chunkSize=qureg.numAmpsPerChunk;
2392  long long int chunkId=qureg.chunkId;
2393 
2394  int controlBit;
2395 
2396  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2397  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2398  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2399  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2400  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2401 
2402 # ifdef _OPENMP
2403 # pragma omp parallel \
2404  default (none) \
2405  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2406  rot1Real,rot1Imag, rot2Real,rot2Imag, numTasks,chunkId,chunkSize,controlQubit) \
2407  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2408 # endif
2409  {
2410 # ifdef _OPENMP
2411 # pragma omp for schedule (static)
2412 # endif
2413  for (thisTask=0; thisTask<numTasks; thisTask++) {
2414  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2415  if (controlBit){
2416  // store current state vector values in temp variables
2417  stateRealUp = stateVecRealUp[thisTask];
2418  stateImagUp = stateVecImagUp[thisTask];
2419 
2420  stateRealLo = stateVecRealLo[thisTask];
2421  stateImagLo = stateVecImagLo[thisTask];
2422 
2423  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2424  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2425  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2426  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2427  }
2428  }
2429  }
2430 }
2431 
2448  Qureg qureg,
2449  int targetQubit,
2450  long long int ctrlQubitsMask, long long int ctrlFlipMask,
2451  Complex rot1, Complex rot2,
2452  ComplexArray stateVecUp,
2453  ComplexArray stateVecLo,
2454  ComplexArray stateVecOut)
2455 {
2456 
2457  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2458  long long int thisTask;
2459  long long int numTasks=qureg.numAmpsPerChunk;
2460  long long int chunkSize=qureg.numAmpsPerChunk;
2461  long long int chunkId=qureg.chunkId;
2462 
2463  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2464  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2465  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2466  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2467  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2468 
2469 # ifdef _OPENMP
2470 # pragma omp parallel \
2471  default (none) \
2472  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2473  rot1Real,rot1Imag, rot2Real,rot2Imag, ctrlQubitsMask,ctrlFlipMask, numTasks,chunkId,chunkSize) \
2474  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2475 # endif
2476  {
2477 # ifdef _OPENMP
2478 # pragma omp for schedule (static)
2479 # endif
2480  for (thisTask=0; thisTask<numTasks; thisTask++) {
2481  if (ctrlQubitsMask == (ctrlQubitsMask & ((thisTask+chunkId*chunkSize) ^ ctrlFlipMask))) {
2482  // store current state vector values in temp variables
2483  stateRealUp = stateVecRealUp[thisTask];
2484  stateImagUp = stateVecImagUp[thisTask];
2485 
2486  stateRealLo = stateVecRealLo[thisTask];
2487  stateImagLo = stateVecImagLo[thisTask];
2488 
2489  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2490  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2491  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2492  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2493  }
2494  }
2495  }
2496 }
2497 
2498 void statevec_pauliXLocal(Qureg qureg, int targetQubit)
2499 {
2500  long long int sizeBlock, sizeHalfBlock;
2501  long long int thisBlock, // current block
2502  indexUp,indexLo; // current index and corresponding index in lower half block
2503 
2504  qreal stateRealUp,stateImagUp;
2505  long long int thisTask;
2506  long long int numTasks=qureg.numAmpsPerChunk>>1;
2507 
2508  // set dimensions
2509  sizeHalfBlock = 1LL << targetQubit;
2510  sizeBlock = 2LL * sizeHalfBlock;
2511 
2512  // Can't use qureg.stateVec as a private OMP var
2513  qreal *stateVecReal = qureg.stateVec.real;
2514  qreal *stateVecImag = qureg.stateVec.imag;
2515 
2516 # ifdef _OPENMP
2517 # pragma omp parallel \
2518  default (none) \
2519  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks) \
2520  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2521 # endif
2522  {
2523 # ifdef _OPENMP
2524 # pragma omp for schedule (static)
2525 # endif
2526  for (thisTask=0; thisTask<numTasks; thisTask++) {
2527  thisBlock = thisTask / sizeHalfBlock;
2528  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2529  indexLo = indexUp + sizeHalfBlock;
2530 
2531  stateRealUp = stateVecReal[indexUp];
2532  stateImagUp = stateVecImag[indexUp];
2533 
2534  stateVecReal[indexUp] = stateVecReal[indexLo];
2535  stateVecImag[indexUp] = stateVecImag[indexLo];
2536 
2537  stateVecReal[indexLo] = stateRealUp;
2538  stateVecImag[indexLo] = stateImagUp;
2539  }
2540  }
2541 
2542 }
2543 
2557  ComplexArray stateVecIn,
2558  ComplexArray stateVecOut)
2559 {
2560 
2561  long long int thisTask;
2562  long long int numTasks=qureg.numAmpsPerChunk;
2563 
2564  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2565  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2566 
2567 # ifdef _OPENMP
2568 # pragma omp parallel \
2569  default (none) \
2570  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut,numTasks) \
2571  private (thisTask)
2572 # endif
2573  {
2574 # ifdef _OPENMP
2575 # pragma omp for schedule (static)
2576 # endif
2577  for (thisTask=0; thisTask<numTasks; thisTask++) {
2578  stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2579  stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2580  }
2581  }
2582 }
2583 
2584 void statevec_controlledNotLocal(Qureg qureg, int controlQubit, int targetQubit)
2585 {
2586  long long int sizeBlock, sizeHalfBlock;
2587  long long int thisBlock, // current block
2588  indexUp,indexLo; // current index and corresponding index in lower half block
2589 
2590  qreal stateRealUp,stateImagUp;
2591  long long int thisTask;
2592  long long int numTasks=qureg.numAmpsPerChunk>>1;
2593  long long int chunkSize=qureg.numAmpsPerChunk;
2594  long long int chunkId=qureg.chunkId;
2595 
2596  int controlBit;
2597 
2598  // set dimensions
2599  sizeHalfBlock = 1LL << targetQubit;
2600  sizeBlock = 2LL * sizeHalfBlock;
2601 
2602  // Can't use qureg.stateVec as a private OMP var
2603  qreal *stateVecReal = qureg.stateVec.real;
2604  qreal *stateVecImag = qureg.stateVec.imag;
2605 
2606 # ifdef _OPENMP
2607 # pragma omp parallel \
2608  default (none) \
2609  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,numTasks,chunkId,chunkSize,controlQubit) \
2610  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2611 # endif
2612  {
2613 # ifdef _OPENMP
2614 # pragma omp for schedule (static)
2615 # endif
2616  for (thisTask=0; thisTask<numTasks; thisTask++) {
2617  thisBlock = thisTask / sizeHalfBlock;
2618  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2619  indexLo = indexUp + sizeHalfBlock;
2620 
2621  controlBit = extractBit(controlQubit, indexUp+chunkId*chunkSize);
2622  if (controlBit){
2623  stateRealUp = stateVecReal[indexUp];
2624  stateImagUp = stateVecImag[indexUp];
2625 
2626  stateVecReal[indexUp] = stateVecReal[indexLo];
2627  stateVecImag[indexUp] = stateVecImag[indexLo];
2628 
2629  stateVecReal[indexLo] = stateRealUp;
2630  stateVecImag[indexLo] = stateImagUp;
2631  }
2632  }
2633  }
2634 }
2635 
2646 void statevec_controlledNotDistributed (Qureg qureg, int controlQubit,
2647  ComplexArray stateVecIn,
2648  ComplexArray stateVecOut)
2649 {
2650 
2651  long long int thisTask;
2652  long long int numTasks=qureg.numAmpsPerChunk;
2653  long long int chunkSize=qureg.numAmpsPerChunk;
2654  long long int chunkId=qureg.chunkId;
2655 
2656  int controlBit;
2657 
2658  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2659  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2660 
2661 # ifdef _OPENMP
2662 # pragma omp parallel \
2663  default (none) \
2664  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2665  numTasks,chunkId,chunkSize,controlQubit) \
2666  private (thisTask,controlBit)
2667 # endif
2668  {
2669 # ifdef _OPENMP
2670 # pragma omp for schedule (static)
2671 # endif
2672  for (thisTask=0; thisTask<numTasks; thisTask++) {
2673  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2674  if (controlBit){
2675  stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2676  stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2677  }
2678  }
2679  }
2680 }
2681 
2682 void statevec_pauliYLocal(Qureg qureg, int targetQubit, int conjFac)
2683 {
2684  long long int sizeBlock, sizeHalfBlock;
2685  long long int thisBlock, // current block
2686  indexUp,indexLo; // current index and corresponding index in lower half block
2687 
2688  qreal stateRealUp,stateImagUp;
2689  long long int thisTask;
2690  long long int numTasks=qureg.numAmpsPerChunk>>1;
2691 
2692  // set dimensions
2693  sizeHalfBlock = 1LL << targetQubit;
2694  sizeBlock = 2LL * sizeHalfBlock;
2695 
2696  // Can't use qureg.stateVec as a private OMP var
2697  qreal *stateVecReal = qureg.stateVec.real;
2698  qreal *stateVecImag = qureg.stateVec.imag;
2699 
2700 # ifdef _OPENMP
2701 # pragma omp parallel \
2702  default (none) \
2703  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,conjFac) \
2704  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2705 # endif
2706  {
2707 # ifdef _OPENMP
2708 # pragma omp for schedule (static)
2709 # endif
2710  for (thisTask=0; thisTask<numTasks; thisTask++) {
2711  thisBlock = thisTask / sizeHalfBlock;
2712  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2713  indexLo = indexUp + sizeHalfBlock;
2714 
2715  stateRealUp = stateVecReal[indexUp];
2716  stateImagUp = stateVecImag[indexUp];
2717 
2718  stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2719  stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2720  stateVecReal[indexLo] = conjFac * -stateImagUp;
2721  stateVecImag[indexLo] = conjFac * stateRealUp;
2722  }
2723  }
2724 }
2725 
2740  ComplexArray stateVecIn,
2741  ComplexArray stateVecOut,
2742  int updateUpper, int conjFac)
2743 {
2744 
2745  long long int thisTask;
2746  long long int numTasks=qureg.numAmpsPerChunk;
2747 
2748  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2749  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2750 
2751  int realSign=1, imagSign=1;
2752  if (updateUpper) imagSign=-1;
2753  else realSign = -1;
2754 
2755 # ifdef _OPENMP
2756 # pragma omp parallel \
2757  default (none) \
2758  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2759  realSign,imagSign, numTasks,conjFac) \
2760  private (thisTask)
2761 # endif
2762  {
2763 # ifdef _OPENMP
2764 # pragma omp for schedule (static)
2765 # endif
2766  for (thisTask=0; thisTask<numTasks; thisTask++) {
2767  stateVecRealOut[thisTask] = conjFac * realSign * stateVecImagIn[thisTask];
2768  stateVecImagOut[thisTask] = conjFac * imagSign * stateVecRealIn[thisTask];
2769  }
2770  }
2771 }
2772 
2773 
2774 
2775 
2776 void statevec_controlledPauliYLocal(Qureg qureg, int controlQubit, int targetQubit, int conjFac)
2777 {
2778  long long int sizeBlock, sizeHalfBlock;
2779  long long int thisBlock, // current block
2780  indexUp,indexLo; // current index and corresponding index in lower half block
2781 
2782  qreal stateRealUp,stateImagUp;
2783  long long int thisTask;
2784  long long int numTasks=qureg.numAmpsPerChunk>>1;
2785  long long int chunkSize=qureg.numAmpsPerChunk;
2786  long long int chunkId=qureg.chunkId;
2787 
2788  int controlBit;
2789 
2790  // set dimensions
2791  sizeHalfBlock = 1LL << targetQubit;
2792  sizeBlock = 2LL * sizeHalfBlock;
2793 
2794  // Can't use qureg.stateVec as a private OMP var
2795  qreal *stateVecReal = qureg.stateVec.real;
2796  qreal *stateVecImag = qureg.stateVec.imag;
2797 
2798 # ifdef _OPENMP
2799 # pragma omp parallel \
2800  default (none) \
2801  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,chunkId, \
2802  chunkSize,controlQubit,conjFac) \
2803  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2804 # endif
2805  {
2806 # ifdef _OPENMP
2807 # pragma omp for schedule (static)
2808 # endif
2809  for (thisTask=0; thisTask<numTasks; thisTask++) {
2810  thisBlock = thisTask / sizeHalfBlock;
2811  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2812  indexLo = indexUp + sizeHalfBlock;
2813 
2814  controlBit = extractBit(controlQubit, indexUp+chunkId*chunkSize);
2815  if (controlBit){
2816  stateRealUp = stateVecReal[indexUp];
2817  stateImagUp = stateVecImag[indexUp];
2818 
2819  // update under +-{{0, -i}, {i, 0}}
2820  stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2821  stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2822  stateVecReal[indexLo] = conjFac * -stateImagUp;
2823  stateVecImag[indexLo] = conjFac * stateRealUp;
2824  }
2825  }
2826  }
2827 }
2828 
2829 
2830 void statevec_controlledPauliYDistributed (Qureg qureg, int controlQubit,
2831  ComplexArray stateVecIn,
2832  ComplexArray stateVecOut, int conjFac)
2833 {
2834 
2835  long long int thisTask;
2836  long long int numTasks=qureg.numAmpsPerChunk;
2837  long long int chunkSize=qureg.numAmpsPerChunk;
2838  long long int chunkId=qureg.chunkId;
2839 
2840  int controlBit;
2841 
2842  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2843  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2844 
2845 # ifdef _OPENMP
2846 # pragma omp parallel \
2847  default (none) \
2848  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2849  numTasks,chunkId,chunkSize,controlQubit,conjFac) \
2850  private (thisTask,controlBit)
2851 # endif
2852  {
2853 # ifdef _OPENMP
2854 # pragma omp for schedule (static)
2855 # endif
2856  for (thisTask=0; thisTask<numTasks; thisTask++) {
2857  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2858  if (controlBit){
2859  stateVecRealOut[thisTask] = conjFac * stateVecImagIn[thisTask];
2860  stateVecImagOut[thisTask] = conjFac * -stateVecRealIn[thisTask];
2861  }
2862  }
2863  }
2864 }
2865 
2866 
2867 
2868 
2869 
2870 
2871 
2872 void statevec_hadamardLocal(Qureg qureg, int targetQubit)
2873 {
2874  long long int sizeBlock, sizeHalfBlock;
2875  long long int thisBlock, // current block
2876  indexUp,indexLo; // current index and corresponding index in lower half block
2877 
2878  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2879  long long int thisTask;
2880  long long int numTasks=qureg.numAmpsPerChunk>>1;
2881 
2882  // set dimensions
2883  sizeHalfBlock = 1LL << targetQubit;
2884  sizeBlock = 2LL * sizeHalfBlock;
2885 
2886  // Can't use qureg.stateVec as a private OMP var
2887  qreal *stateVecReal = qureg.stateVec.real;
2888  qreal *stateVecImag = qureg.stateVec.imag;
2889 
2890  qreal recRoot2 = 1.0/sqrt(2);
2891 
2892 # ifdef _OPENMP
2893 # pragma omp parallel \
2894  default (none) \
2895  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, recRoot2, numTasks) \
2896  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2897 # endif
2898  {
2899 # ifdef _OPENMP
2900 # pragma omp for schedule (static)
2901 # endif
2902  for (thisTask=0; thisTask<numTasks; thisTask++) {
2903  thisBlock = thisTask / sizeHalfBlock;
2904  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2905  indexLo = indexUp + sizeHalfBlock;
2906 
2907  stateRealUp = stateVecReal[indexUp];
2908  stateImagUp = stateVecImag[indexUp];
2909 
2910  stateRealLo = stateVecReal[indexLo];
2911  stateImagLo = stateVecImag[indexLo];
2912 
2913  stateVecReal[indexUp] = recRoot2*(stateRealUp + stateRealLo);
2914  stateVecImag[indexUp] = recRoot2*(stateImagUp + stateImagLo);
2915 
2916  stateVecReal[indexLo] = recRoot2*(stateRealUp - stateRealLo);
2917  stateVecImag[indexLo] = recRoot2*(stateImagUp - stateImagLo);
2918  }
2919  }
2920 }
2921 
2933  ComplexArray stateVecUp,
2934  ComplexArray stateVecLo,
2935  ComplexArray stateVecOut,
2936  int updateUpper)
2937 {
2938 
2939  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2940  long long int thisTask;
2941  long long int numTasks=qureg.numAmpsPerChunk;
2942 
2943  int sign;
2944  if (updateUpper) sign=1;
2945  else sign=-1;
2946 
2947  qreal recRoot2 = 1.0/sqrt(2);
2948 
2949  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2950  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2951  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2952 
2953 # ifdef _OPENMP
2954 # pragma omp parallel \
2955  default (none) \
2956  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2957  recRoot2, sign, numTasks) \
2958  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2959 # endif
2960  {
2961 # ifdef _OPENMP
2962 # pragma omp for schedule (static)
2963 # endif
2964  for (thisTask=0; thisTask<numTasks; thisTask++) {
2965  // store current state vector values in temp variables
2966  stateRealUp = stateVecRealUp[thisTask];
2967  stateImagUp = stateVecImagUp[thisTask];
2968 
2969  stateRealLo = stateVecRealLo[thisTask];
2970  stateImagLo = stateVecImagLo[thisTask];
2971 
2972  stateVecRealOut[thisTask] = recRoot2*(stateRealUp + sign*stateRealLo);
2973  stateVecImagOut[thisTask] = recRoot2*(stateImagUp + sign*stateImagLo);
2974  }
2975  }
2976 }
2977 
2978 void statevec_phaseShiftByTerm (Qureg qureg, int targetQubit, Complex term)
2979 {
2980  long long int index;
2981  long long int stateVecSize;
2982  int targetBit;
2983 
2984  long long int chunkSize=qureg.numAmpsPerChunk;
2985  long long int chunkId=qureg.chunkId;
2986 
2987  // dimension of the state vector
2988  stateVecSize = qureg.numAmpsPerChunk;
2989  qreal *stateVecReal = qureg.stateVec.real;
2990  qreal *stateVecImag = qureg.stateVec.imag;
2991 
2992  qreal stateRealLo, stateImagLo;
2993  qreal cosAngle = term.real;
2994  qreal sinAngle = term.imag;
2995 
2996 # ifdef _OPENMP
2997 # pragma omp parallel for \
2998  default (none) \
2999  shared (stateVecSize, stateVecReal,stateVecImag, cosAngle,sinAngle, \
3000  chunkId,chunkSize,targetQubit) \
3001  private (index,targetBit,stateRealLo,stateImagLo) \
3002  schedule (static)
3003 # endif
3004  for (index=0; index<stateVecSize; index++) {
3005 
3006  // update the coeff of the |1> state of the target qubit
3007  targetBit = extractBit (targetQubit, index+chunkId*chunkSize);
3008  if (targetBit) {
3009 
3010  stateRealLo = stateVecReal[index];
3011  stateImagLo = stateVecImag[index];
3012 
3013  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3014  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3015  }
3016  }
3017 }
3018 
3019 void statevec_controlledPhaseShift (Qureg qureg, int idQubit1, int idQubit2, qreal angle)
3020 {
3021  long long int index;
3022  long long int stateVecSize;
3023  int bit1, bit2;
3024 
3025  long long int chunkSize=qureg.numAmpsPerChunk;
3026  long long int chunkId=qureg.chunkId;
3027 
3028  // dimension of the state vector
3029  stateVecSize = qureg.numAmpsPerChunk;
3030  qreal *stateVecReal = qureg.stateVec.real;
3031  qreal *stateVecImag = qureg.stateVec.imag;
3032 
3033  qreal stateRealLo, stateImagLo;
3034  qreal cosAngle = cos(angle);
3035  qreal sinAngle = sin(angle);
3036 
3037 # ifdef _OPENMP
3038 # pragma omp parallel for \
3039  default (none) \
3040  shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize, \
3041  idQubit1,idQubit2,cosAngle,sinAngle ) \
3042  private (index,bit1,bit2,stateRealLo,stateImagLo) \
3043  schedule (static)
3044 # endif
3045  for (index=0; index<stateVecSize; index++) {
3046  bit1 = extractBit (idQubit1, index+chunkId*chunkSize);
3047  bit2 = extractBit (idQubit2, index+chunkId*chunkSize);
3048  if (bit1 && bit2) {
3049 
3050  stateRealLo = stateVecReal[index];
3051  stateImagLo = stateVecImag[index];
3052 
3053  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3054  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3055  }
3056  }
3057 }
3058 
3059 void statevec_multiControlledPhaseShift(Qureg qureg, int *controlQubits, int numControlQubits, qreal angle)
3060 {
3061  long long int index;
3062  long long int stateVecSize;
3063 
3064  long long int chunkSize=qureg.numAmpsPerChunk;
3065  long long int chunkId=qureg.chunkId;
3066 
3067  long long int mask = getQubitBitMask(controlQubits, numControlQubits);
3068 
3069  stateVecSize = qureg.numAmpsPerChunk;
3070  qreal *stateVecReal = qureg.stateVec.real;
3071  qreal *stateVecImag = qureg.stateVec.imag;
3072 
3073  qreal stateRealLo, stateImagLo;
3074  qreal cosAngle = cos(angle);
3075  qreal sinAngle = sin(angle);
3076 
3077 # ifdef _OPENMP
3078 # pragma omp parallel \
3079  default (none) \
3080  shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3081  private (index, stateRealLo, stateImagLo)
3082 # endif
3083  {
3084 # ifdef _OPENMP
3085 # pragma omp for schedule (static)
3086 # endif
3087  for (index=0; index<stateVecSize; index++) {
3088  if (mask == (mask & (index+chunkId*chunkSize)) ){
3089 
3090  stateRealLo = stateVecReal[index];
3091  stateImagLo = stateVecImag[index];
3092 
3093  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3094  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3095  }
3096  }
3097  }
3098 }
3099 
3100 int getBitMaskParity(long long int mask) {
3101  int parity = 0;
3102  while (mask) {
3103  parity = !parity;
3104  mask = mask & (mask-1);
3105  }
3106  return parity;
3107 }
3108 
3109 void statevec_multiRotateZ(Qureg qureg, long long int mask, qreal angle)
3110 {
3111  long long int index;
3112  long long int stateVecSize;
3113 
3114  long long int chunkSize=qureg.numAmpsPerChunk;
3115  long long int chunkId=qureg.chunkId;
3116 
3117  stateVecSize = qureg.numAmpsPerChunk;
3118  qreal *stateVecReal = qureg.stateVec.real;
3119  qreal *stateVecImag = qureg.stateVec.imag;
3120 
3121  qreal stateReal, stateImag;
3122  qreal cosAngle = cos(angle/2.0);
3123  qreal sinAngle = sin(angle/2.0);
3124 
3125  // = +-1, to flip sinAngle based on target qubit parity, to effect
3126  // exp(-angle/2 i fac_j)|j>
3127  int fac;
3128 
3129 # ifdef _OPENMP
3130 # pragma omp parallel \
3131  default (none) \
3132  shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3133  private (index, fac, stateReal, stateImag)
3134 # endif
3135  {
3136 # ifdef _OPENMP
3137 # pragma omp for schedule (static)
3138 # endif
3139  for (index=0; index<stateVecSize; index++) {
3140  stateReal = stateVecReal[index];
3141  stateImag = stateVecImag[index];
3142 
3143  // odd-parity target qubits get fac_j = -1
3144  fac = getBitMaskParity(mask & (index+chunkId*chunkSize))? -1 : 1;
3145  stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3146  stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3147  }
3148  }
3149 }
3150 
3152 
3153  // computes first local index containing a diagonal element
3154  long long int localNumAmps = qureg.numAmpsPerChunk;
3155  long long int densityDim = (1LL << qureg.numQubitsRepresented);
3156  long long int diagSpacing = 1LL + densityDim;
3157  long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3158  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*localNumAmps)/diagSpacing : 0;
3159  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3160  long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3161 
3162  // computes how many diagonals are contained in this chunk
3163  long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3164  if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3165  numDiagsInThisChunk -= 1;
3166 
3167  long long int visitedDiags; // number of visited diagonals in this chunk so far
3168  long long int basisStateInd; // current diagonal index being considered
3169  long long int index; // index in the local chunk
3170 
3171  qreal zeroProb = 0;
3172  qreal *stateVecReal = qureg.stateVec.real;
3173 
3174 # ifdef _OPENMP
3175 # pragma omp parallel \
3176  shared (localIndNextDiag, numPrevDiags, diagSpacing, stateVecReal, numDiagsInThisChunk) \
3177  private (visitedDiags, basisStateInd, index) \
3178  reduction ( +:zeroProb )
3179 # endif
3180  {
3181 # ifdef _OPENMP
3182 # pragma omp for schedule (static)
3183 # endif
3184  // sums the diagonal elems of the density matrix where measureQubit=0
3185  for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3186 
3187  basisStateInd = numPrevDiags + visitedDiags;
3188  index = localIndNextDiag + diagSpacing * visitedDiags;
3189 
3190  if (extractBit(measureQubit, basisStateInd) == 0)
3191  zeroProb += stateVecReal[index]; // assume imag[diagonls] ~ 0
3192 
3193  }
3194  }
3195 
3196  return zeroProb;
3197 }
3198 
3207  int measureQubit)
3208 {
3209  // ----- sizes
3210  long long int sizeBlock, // size of blocks
3211  sizeHalfBlock; // size of blocks halved
3212  // ----- indices
3213  long long int thisBlock, // current block
3214  index; // current index for first half block
3215  // ----- measured probability
3216  qreal totalProbability; // probability (returned) value
3217  // ----- temp variables
3218  long long int thisTask;
3219  long long int numTasks=qureg.numAmpsPerChunk>>1;
3220 
3221  // ---------------------------------------------------------------- //
3222  // dimensions //
3223  // ---------------------------------------------------------------- //
3224  sizeHalfBlock = 1LL << (measureQubit); // number of state vector elements to sum,
3225  // and then the number to skip
3226  sizeBlock = 2LL * sizeHalfBlock; // size of blocks (pairs of measure and skip entries)
3227 
3228  // initialise returned value
3229  totalProbability = 0.0;
3230 
3231  qreal *stateVecReal = qureg.stateVec.real;
3232  qreal *stateVecImag = qureg.stateVec.imag;
3233 
3234 # ifdef _OPENMP
3235 # pragma omp parallel \
3236  shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag) \
3237  private (thisTask,thisBlock,index) \
3238  reduction ( +:totalProbability )
3239 # endif
3240  {
3241 # ifdef _OPENMP
3242 # pragma omp for schedule (static)
3243 # endif
3244  for (thisTask=0; thisTask<numTasks; thisTask++) {
3245  thisBlock = thisTask / sizeHalfBlock;
3246  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3247 
3248  totalProbability += stateVecReal[index]*stateVecReal[index]
3249  + stateVecImag[index]*stateVecImag[index];
3250  }
3251  }
3252  return totalProbability;
3253 }
3254 
3263  // ----- measured probability
3264  qreal totalProbability; // probability (returned) value
3265  // ----- temp variables
3266  long long int thisTask; // task based approach for expose loop with small granularity
3267  long long int numTasks=qureg.numAmpsPerChunk;
3268 
3269  // ---------------------------------------------------------------- //
3270  // find probability //
3271  // ---------------------------------------------------------------- //
3272 
3273  // initialise returned value
3274  totalProbability = 0.0;
3275 
3276  qreal *stateVecReal = qureg.stateVec.real;
3277  qreal *stateVecImag = qureg.stateVec.imag;
3278 
3279 # ifdef _OPENMP
3280 # pragma omp parallel \
3281  shared (numTasks,stateVecReal,stateVecImag) \
3282  private (thisTask) \
3283  reduction ( +:totalProbability )
3284 # endif
3285  {
3286 # ifdef _OPENMP
3287 # pragma omp for schedule (static)
3288 # endif
3289  for (thisTask=0; thisTask<numTasks; thisTask++) {
3290  totalProbability += stateVecReal[thisTask]*stateVecReal[thisTask]
3291  + stateVecImag[thisTask]*stateVecImag[thisTask];
3292  }
3293  }
3294 
3295  return totalProbability;
3296 }
3297 
3298 
3299 
3300 void statevec_controlledPhaseFlip (Qureg qureg, int idQubit1, int idQubit2)
3301 {
3302  long long int index;
3303  long long int stateVecSize;
3304  int bit1, bit2;
3305 
3306  long long int chunkSize=qureg.numAmpsPerChunk;
3307  long long int chunkId=qureg.chunkId;
3308 
3309  // dimension of the state vector
3310  stateVecSize = qureg.numAmpsPerChunk;
3311  qreal *stateVecReal = qureg.stateVec.real;
3312  qreal *stateVecImag = qureg.stateVec.imag;
3313 
3314 # ifdef _OPENMP
3315 # pragma omp parallel for \
3316  default (none) \
3317  shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize,idQubit1,idQubit2 ) \
3318  private (index,bit1,bit2) \
3319  schedule (static)
3320 # endif
3321  for (index=0; index<stateVecSize; index++) {
3322  bit1 = extractBit (idQubit1, index+chunkId*chunkSize);
3323  bit2 = extractBit (idQubit2, index+chunkId*chunkSize);
3324  if (bit1 && bit2) {
3325  stateVecReal [index] = - stateVecReal [index];
3326  stateVecImag [index] = - stateVecImag [index];
3327  }
3328  }
3329 }
3330 
3331 void statevec_multiControlledPhaseFlip(Qureg qureg, int *controlQubits, int numControlQubits)
3332 {
3333  long long int index;
3334  long long int stateVecSize;
3335 
3336  long long int chunkSize=qureg.numAmpsPerChunk;
3337  long long int chunkId=qureg.chunkId;
3338 
3339  long long int mask = getQubitBitMask(controlQubits, numControlQubits);
3340 
3341  stateVecSize = qureg.numAmpsPerChunk;
3342  qreal *stateVecReal = qureg.stateVec.real;
3343  qreal *stateVecImag = qureg.stateVec.imag;
3344 
3345 # ifdef _OPENMP
3346 # pragma omp parallel \
3347  default (none) \
3348  shared (stateVecSize, stateVecReal,stateVecImag, mask, chunkId,chunkSize ) \
3349  private (index)
3350 # endif
3351  {
3352 # ifdef _OPENMP
3353 # pragma omp for schedule (static)
3354 # endif
3355  for (index=0; index<stateVecSize; index++) {
3356  if (mask == (mask & (index+chunkId*chunkSize)) ){
3357  stateVecReal [index] = - stateVecReal [index];
3358  stateVecImag [index] = - stateVecImag [index];
3359  }
3360  }
3361  }
3362 }
3363 
3380 void statevec_collapseToKnownProbOutcomeLocal(Qureg qureg, int measureQubit, int outcome, qreal totalProbability)
3381 {
3382  // ----- sizes
3383  long long int sizeBlock, // size of blocks
3384  sizeHalfBlock; // size of blocks halved
3385  // ----- indices
3386  long long int thisBlock, // current block
3387  index; // current index for first half block
3388  // ----- measured probability
3389  qreal renorm; // probability (returned) value
3390  // ----- temp variables
3391  long long int thisTask; // task based approach for expose loop with small granularity
3392  // (good for shared memory parallelism)
3393  long long int numTasks=qureg.numAmpsPerChunk>>1;
3394 
3395  // ---------------------------------------------------------------- //
3396  // dimensions //
3397  // ---------------------------------------------------------------- //
3398  sizeHalfBlock = 1LL << (measureQubit); // number of state vector elements to sum,
3399  // and then the number to skip
3400  sizeBlock = 2LL * sizeHalfBlock; // size of blocks (pairs of measure and skip entries)
3401 
3402  renorm=1/sqrt(totalProbability);
3403  qreal *stateVecReal = qureg.stateVec.real;
3404  qreal *stateVecImag = qureg.stateVec.imag;
3405 
3406 
3407 # ifdef _OPENMP
3408 # pragma omp parallel \
3409  default (none) \
3410  shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,renorm,outcome) \
3411  private (thisTask,thisBlock,index)
3412 # endif
3413  {
3414  if (outcome==0){
3415  // measure qubit is 0
3416 # ifdef _OPENMP
3417 # pragma omp for schedule (static)
3418 # endif
3419  for (thisTask=0; thisTask<numTasks; thisTask++) {
3420  thisBlock = thisTask / sizeHalfBlock;
3421  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3422  stateVecReal[index]=stateVecReal[index]*renorm;
3423  stateVecImag[index]=stateVecImag[index]*renorm;
3424 
3425  stateVecReal[index+sizeHalfBlock]=0;
3426  stateVecImag[index+sizeHalfBlock]=0;
3427  }
3428  } else {
3429  // measure qubit is 1
3430 # ifdef _OPENMP
3431 # pragma omp for schedule (static)
3432 # endif
3433  for (thisTask=0; thisTask<numTasks; thisTask++) {
3434  thisBlock = thisTask / sizeHalfBlock;
3435  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3436  stateVecReal[index]=0;
3437  stateVecImag[index]=0;
3438 
3439  stateVecReal[index+sizeHalfBlock]=stateVecReal[index+sizeHalfBlock]*renorm;
3440  stateVecImag[index+sizeHalfBlock]=stateVecImag[index+sizeHalfBlock]*renorm;
3441  }
3442  }
3443  }
3444 
3445 }
3446 
3462 void statevec_collapseToKnownProbOutcomeDistributedRenorm (Qureg qureg, int measureQubit, qreal totalProbability)
3463 {
3464  // ----- temp variables
3465  long long int thisTask;
3466  long long int numTasks=qureg.numAmpsPerChunk;
3467 
3468  qreal renorm=1/sqrt(totalProbability);
3469 
3470  qreal *stateVecReal = qureg.stateVec.real;
3471  qreal *stateVecImag = qureg.stateVec.imag;
3472 
3473 # ifdef _OPENMP
3474 # pragma omp parallel \
3475  shared (numTasks,stateVecReal,stateVecImag) \
3476  private (thisTask)
3477 # endif
3478  {
3479 # ifdef _OPENMP
3480 # pragma omp for schedule (static)
3481 # endif
3482  for (thisTask=0; thisTask<numTasks; thisTask++) {
3483  stateVecReal[thisTask] = stateVecReal[thisTask]*renorm;
3484  stateVecImag[thisTask] = stateVecImag[thisTask]*renorm;
3485  }
3486  }
3487 }
3488 
3502 {
3503  // ----- temp variables
3504  long long int thisTask;
3505  long long int numTasks=qureg.numAmpsPerChunk;
3506 
3507  // ---------------------------------------------------------------- //
3508  // find probability //
3509  // ---------------------------------------------------------------- //
3510 
3511  qreal *stateVecReal = qureg.stateVec.real;
3512  qreal *stateVecImag = qureg.stateVec.imag;
3513 
3514 # ifdef _OPENMP
3515 # pragma omp parallel \
3516  shared (numTasks,stateVecReal,stateVecImag) \
3517  private (thisTask)
3518 # endif
3519  {
3520 # ifdef _OPENMP
3521 # pragma omp for schedule (static)
3522 # endif
3523  for (thisTask=0; thisTask<numTasks; thisTask++) {
3524  stateVecReal[thisTask] = 0;
3525  stateVecImag[thisTask] = 0;
3526  }
3527  }
3528 }
3529 
3536 void statevec_swapQubitAmpsLocal(Qureg qureg, int qb1, int qb2) {
3537 
3538  // can't use qureg.stateVec as a private OMP var
3539  qreal *reVec = qureg.stateVec.real;
3540  qreal *imVec = qureg.stateVec.imag;
3541 
3542  long long int numTasks = qureg.numAmpsPerChunk >> 2; // each iteration updates 2 amps and skips 2 amps
3543  long long int thisTask;
3544  long long int ind00, ind01, ind10;
3545  qreal re01, re10;
3546  qreal im01, im10;
3547 
3548 # ifdef _OPENMP
3549 # pragma omp parallel \
3550  default (none) \
3551  shared (reVec,imVec,numTasks,qb1,qb2) \
3552  private (thisTask, ind00,ind01,ind10, re01,re10, im01,im10)
3553 # endif
3554  {
3555 # ifdef _OPENMP
3556 # pragma omp for schedule (static)
3557 # endif
3558  for (thisTask=0; thisTask<numTasks; thisTask++) {
3559  // determine ind00 of |..0..0..>, |..0..1..> and |..1..0..>
3560  ind00 = insertTwoZeroBits(thisTask, qb1, qb2);
3561  ind01 = flipBit(ind00, qb1);
3562  ind10 = flipBit(ind00, qb2);
3563 
3564  // extract statevec amplitudes
3565  re01 = reVec[ind01]; im01 = imVec[ind01];
3566  re10 = reVec[ind10]; im10 = imVec[ind10];
3567 
3568  // swap 01 and 10 amps
3569  reVec[ind01] = re10; reVec[ind10] = re01;
3570  imVec[ind01] = im10; imVec[ind10] = im01;
3571  }
3572  }
3573 }
3574 
3579 void statevec_swapQubitAmpsDistributed(Qureg qureg, int pairRank, int qb1, int qb2) {
3580 
3581  // can't use qureg.stateVec as a private OMP var
3582  qreal *reVec = qureg.stateVec.real;
3583  qreal *imVec = qureg.stateVec.imag;
3584  qreal *rePairVec = qureg.pairStateVec.real;
3585  qreal *imPairVec = qureg.pairStateVec.imag;
3586 
3587  long long int numLocalAmps = qureg.numAmpsPerChunk;
3588  long long int globalStartInd = qureg.chunkId * numLocalAmps;
3589  long long int pairGlobalStartInd = pairRank * numLocalAmps;
3590 
3591  long long int localInd, globalInd;
3592  long long int pairLocalInd, pairGlobalInd;
3593 
3594 # ifdef _OPENMP
3595 # pragma omp parallel \
3596  default (none) \
3597  shared (reVec,imVec,rePairVec,imPairVec,numLocalAmps,globalStartInd,pairGlobalStartInd,qb1,qb2) \
3598  private (localInd,globalInd, pairLocalInd,pairGlobalInd)
3599 # endif
3600  {
3601 # ifdef _OPENMP
3602 # pragma omp for schedule (static)
3603 # endif
3604  for (localInd=0; localInd < numLocalAmps; localInd++) {
3605 
3606  globalInd = globalStartInd + localInd;
3607  if (isOddParity(globalInd, qb1, qb2)) {
3608 
3609  pairGlobalInd = flipBit(flipBit(globalInd, qb1), qb2);
3610  pairLocalInd = pairGlobalInd - pairGlobalStartInd;
3611 
3612  reVec[localInd] = rePairVec[pairLocalInd];
3613  imVec[localInd] = imPairVec[pairLocalInd];
3614  }
3615  }
3616  }
3617 }
3618 
3619 void statevec_setWeightedQureg(Complex fac1, Qureg qureg1, Complex fac2, Qureg qureg2, Complex facOut, Qureg out) {
3620 
3621  long long int numAmps = qureg1.numAmpsPerChunk;
3622 
3623  qreal *vecRe1 = qureg1.stateVec.real;
3624  qreal *vecIm1 = qureg1.stateVec.imag;
3625  qreal *vecRe2 = qureg2.stateVec.real;
3626  qreal *vecIm2 = qureg2.stateVec.imag;
3627  qreal *vecReOut = out.stateVec.real;
3628  qreal *vecImOut = out.stateVec.imag;
3629 
3630  qreal facRe1 = fac1.real;
3631  qreal facIm1 = fac1.imag;
3632  qreal facRe2 = fac2.real;
3633  qreal facIm2 = fac2.imag;
3634  qreal facReOut = facOut.real;
3635  qreal facImOut = facOut.imag;
3636 
3637  qreal re1,im1, re2,im2, reOut,imOut;
3638  long long int index;
3639 
3640 # ifdef _OPENMP
3641 # pragma omp parallel \
3642  shared (vecRe1,vecIm1, vecRe2,vecIm2, vecReOut,vecImOut, facRe1,facIm1,facRe2,facIm2, numAmps) \
3643  private (index, re1,im1, re2,im2, reOut,imOut)
3644 # endif
3645  {
3646 # ifdef _OPENMP
3647 # pragma omp for schedule (static)
3648 # endif
3649  for (index=0LL; index<numAmps; index++) {
3650  re1 = vecRe1[index]; im1 = vecIm1[index];
3651  re2 = vecRe2[index]; im2 = vecIm2[index];
3652  reOut = vecReOut[index];
3653  imOut = vecImOut[index];
3654 
3655  vecReOut[index] = (facReOut*reOut - facImOut*imOut) + (facRe1*re1 - facIm1*im1) + (facRe2*re2 - facIm2*im2);
3656  vecImOut[index] = (facReOut*imOut + facImOut*reOut) + (facRe1*im1 + facIm1*re1) + (facRe2*im2 + facIm2*re2);
3657  }
3658  }
3659 }
3660 
3662 
3663  // each node/chunk modifies only its values in an embarrassingly parallelisable way
3664  long long int numAmps = qureg.numAmpsPerChunk;
3665 
3666  qreal* stateRe = qureg.stateVec.real;
3667  qreal* stateIm = qureg.stateVec.imag;
3668  qreal* opRe = op.real;
3669  qreal* opIm = op.imag;
3670 
3671  qreal a,b,c,d;
3672  long long int index;
3673 
3674 # ifdef _OPENMP
3675 # pragma omp parallel \
3676  shared (stateRe,stateIm, opRe,opIm, numAmps) \
3677  private (index, a,b,c,d)
3678 # endif
3679  {
3680 # ifdef _OPENMP
3681 # pragma omp for schedule (static)
3682 # endif
3683  for (index=0LL; index<numAmps; index++) {
3684  a = stateRe[index];
3685  b = stateIm[index];
3686  c = opRe[index];
3687  d = opIm[index];
3688 
3689  // (a + b i)(c + d i) = (a c - b d) + i (a d + b c)
3690  stateRe[index] = a*c - b*d;
3691  stateIm[index] = a*d + b*c;
3692  }
3693  }
3694 }
3695 
3697 
3698  /* ALL values of op are pre-loaded into qureg.pairStateVector (on every node).
3699  * Furthermore, since it's gauranteed each node contains an integer number of
3700  * columns of qureg (because op upperlimits the number of nodes; 1 per element),
3701  * then we know iteration below begins at the 'top' of a column, and there is
3702  * no offset for op (pairStateVector)
3703  */
3704 
3705  long long int numAmps = qureg.numAmpsPerChunk;
3706  int opDim = (1 << op.numQubits);
3707 
3708  qreal* stateRe = qureg.stateVec.real;
3709  qreal* stateIm = qureg.stateVec.imag;
3710  qreal* opRe = qureg.pairStateVec.real;
3711  qreal* opIm = qureg.pairStateVec.imag;
3712 
3713  qreal a,b,c,d;
3714  long long int index;
3715 
3716 # ifdef _OPENMP
3717 # pragma omp parallel \
3718  shared (stateRe,stateIm, opRe,opIm, numAmps,opDim) \
3719  private (index, a,b,c,d)
3720 # endif
3721  {
3722 # ifdef _OPENMP
3723 # pragma omp for schedule (static)
3724 # endif
3725  for (index=0LL; index<numAmps; index++) {
3726  a = stateRe[index];
3727  b = stateIm[index];
3728  c = opRe[index % opDim];
3729  d = opIm[index % opDim];
3730 
3731  // (a + b i)(c + d i) = (a c - b d) + i (a d + b c)
3732  stateRe[index] = a*c - b*d;
3733  stateIm[index] = a*d + b*c;
3734  }
3735  }
3736 }
3737 
3739 
3740  qreal expecRe = 0;
3741  qreal expecIm = 0;
3742 
3743  long long int index;
3744  long long int numAmps = qureg.numAmpsPerChunk;
3745  qreal *stateReal = qureg.stateVec.real;
3746  qreal *stateImag = qureg.stateVec.imag;
3747  qreal *opReal = op.real;
3748  qreal *opImag = op.imag;
3749 
3750  qreal vecRe,vecIm,vecAbs, opRe, opIm;
3751 
3752 # ifdef _OPENMP
3753 # pragma omp parallel \
3754  shared (stateReal, stateImag, opReal, opImag, numAmps) \
3755  private (index, vecRe,vecIm,vecAbs, opRe,opIm) \
3756  reduction ( +:expecRe, expecIm )
3757 # endif
3758  {
3759 # ifdef _OPENMP
3760 # pragma omp for schedule (static)
3761 # endif
3762  for (index=0; index < numAmps; index++) {
3763  vecRe = stateReal[index];
3764  vecIm = stateImag[index];
3765  opRe = opReal[index];
3766  opIm = opImag[index];
3767 
3768  // abs(vec)^2 op
3769  vecAbs = vecRe*vecRe + vecIm*vecIm;
3770  expecRe += vecAbs*opRe;
3771  expecIm += vecAbs*opIm;
3772  }
3773  }
3774 
3775  Complex innerProd;
3776  innerProd.real = expecRe;
3777  innerProd.imag = expecIm;
3778  return innerProd;
3779 }
3780 
3782 
3783  /* since for every 1 element in \p op, there exists a column in \p qureg,
3784  * we know that the elements in \p op live on the same node as the
3785  * corresponding diagonal elements of \p qureg. This means, the problem is
3786  * embarrassingly parallelisable, and the code below works for both
3787  * serial and distributed modes.
3788  */
3789 
3790  // computes first local index containing a diagonal element
3791  long long int diagSpacing = 1LL + (1LL << qureg.numQubitsRepresented);
3792  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*qureg.numAmpsPerChunk)/diagSpacing : 0;
3793  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3794  long long int localIndNextDiag = globalIndNextDiag % qureg.numAmpsPerChunk;
3795  long long int numAmps = qureg.numAmpsPerChunk;
3796 
3797  qreal* stateReal = qureg.stateVec.real;
3798  qreal* stateImag = qureg.stateVec.imag;
3799  qreal* opReal = op.real;
3800  qreal* opImag = op.imag;
3801 
3802  qreal expecRe = 0;
3803  qreal expecIm = 0;
3804 
3805  long long int stateInd;
3806  long long int opInd;
3807  qreal matRe, matIm, opRe, opIm;
3808 
3809  // visits every diagonal element with global index (2^n + 1)i for i in [0, 2^n-1]
3810 
3811 # ifdef _OPENMP
3812 # pragma omp parallel \
3813  shared (stateReal,stateImag, opReal,opImag, localIndNextDiag,diagSpacing,numAmps) \
3814  private (stateInd,opInd, matRe,matIm, opRe,opIm) \
3815  reduction ( +:expecRe, expecIm )
3816 # endif
3817  {
3818 # ifdef _OPENMP
3819 # pragma omp for schedule (static)
3820 # endif
3821  for (stateInd=localIndNextDiag; stateInd < numAmps; stateInd += diagSpacing) {
3822 
3823  matRe = stateReal[stateInd];
3824  matIm = stateImag[stateInd];
3825  opInd = (stateInd - localIndNextDiag) / diagSpacing;
3826  opRe = opReal[opInd];
3827  opIm = opImag[opInd];
3828 
3829  // (matRe + matIm i)(opRe + opIm i) =
3830  // (matRe opRe - matIm opIm) + i (matRe opIm + matIm opRe)
3831  expecRe += matRe * opRe - matIm * opIm;
3832  expecIm += matRe * opIm + matIm * opRe;
3833  }
3834  }
3835 
3836  Complex expecVal;
3837  expecVal.real = expecRe;
3838  expecVal.imag = expecIm;
3839  return expecVal;
3840 }
3841 
3842 void agnostic_setDiagonalOpElems(DiagonalOp op, long long int startInd, qreal* real, qreal* imag, long long int numElems) {
3843 
3844  // local start/end indices of the given amplitudes, assuming they fit in this chunk
3845  // these may be negative or above qureg.numAmpsPerChunk
3846  long long int localStartInd = startInd - op.chunkId*op.numElemsPerChunk;
3847  long long int localEndInd = localStartInd + numElems; // exclusive
3848 
3849  // add this to a local index to get corresponding elem in reals & imags
3850  long long int offset = op.chunkId*op.numElemsPerChunk - startInd;
3851 
3852  // restrict these indices to fit into this chunk
3853  if (localStartInd < 0)
3854  localStartInd = 0;
3855  if (localEndInd > op.numElemsPerChunk)
3856  localEndInd = op.numElemsPerChunk;
3857  // they may now be out of order = no iterations
3858 
3859  // unpacking OpenMP vars
3860  long long int index;
3861  qreal* vecRe = op.real;
3862  qreal* vecIm = op.imag;
3863 
3864 # ifdef _OPENMP
3865 # pragma omp parallel \
3866  default (none) \
3867  shared (localStartInd,localEndInd, vecRe,vecIm, real,imag, offset) \
3868  private (index)
3869 # endif
3870  {
3871 # ifdef _OPENMP
3872 # pragma omp for schedule (static)
3873 # endif
3874  // iterate these local inds - this might involve no iterations
3875  for (index=localStartInd; index < localEndInd; index++) {
3876  vecRe[index] = real[index + offset];
3877  vecIm[index] = imag[index + offset];
3878  }
3879  }
3880 }
int qsortComp(const void *a, const void *b)
Definition: QuEST_cpu.c:1842
void copyStateFromGPU(Qureg qureg)
In GPU mode, this copies the state-vector (or density matrix) from GPU memory (qureg....
Definition: QuEST_cpu.c:39
qreal real[4][4]
Definition: QuEST.h:127
void syncQuESTEnv(QuESTEnv env)
Guarantees that all code up to the given point has been executed on all nodes (if running in distribu...
void statevec_controlledNotLocal(Qureg qureg, int controlQubit, int targetQubit)
Definition: QuEST_cpu.c:2584
void statevec_controlledPhaseShift(Qureg qureg, int idQubit1, int idQubit2, qreal angle)
Definition: QuEST_cpu.c:3019
void statevec_pauliYLocal(Qureg qureg, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2682
qreal densmatr_calcHilbertSchmidtDistanceSquaredLocal(Qureg a, Qureg b)
computes Tr((a-b) conjTrans(a-b)) = sum of abs values of (a-b)
Definition: QuEST_cpu.c:923
int rank
Definition: QuEST.h:244
int numChunks
The number of nodes between which the elements of this operator are split.
Definition: QuEST.h:185
void agnostic_setDiagonalOpElems(DiagonalOp op, long long int startInd, qreal *real, qreal *imag, long long int numElems)
Definition: QuEST_cpu.c:3842
void densmatr_initClassicalState(Qureg qureg, long long int stateInd)
Definition: QuEST_cpu.c:1115
ComplexArray pairStateVec
Temporary storage for a chunk of the state vector received from another process in the MPI version.
Definition: QuEST.h:224
void statevec_initDebugState(Qureg qureg)
Initialise the state vector of probability amplitudes to an (unphysical) state with each component of...
Definition: QuEST_cpu.c:1591
qreal statevec_findProbabilityOfZeroDistributed(Qureg qureg)
Measure the probability of a specified qubit being in the zero state across all amplitudes held in th...
Definition: QuEST_cpu.c:3262
void densmatr_mixDepolarisingDistributed(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:224
void densmatr_mixTwoQubitDepolarisingQ1LocalQ2DistributedPart3(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:632
int numChunks
Number of chunks the state vector is broken up into – the number of MPI processes used.
Definition: QuEST.h:219
void statevec_swapQubitAmpsLocal(Qureg qureg, int qb1, int qb2)
It is ensured that all amplitudes needing to be swapped are on this node.
Definition: QuEST_cpu.c:3536
int getBitMaskParity(long long int mask)
Definition: QuEST_cpu.c:3100
void statevec_multiControlledUnitaryDistributed(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit in the state vector of probability amplitudes,...
Definition: QuEST_cpu.c:2447
void statevec_controlledUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2381
void statevec_collapseToKnownProbOutcomeLocal(Qureg qureg, int measureQubit, int outcome, qreal totalProbability)
Update the state vector to be consistent with measuring measureQubit=0 if outcome=0 and measureQubit=...
Definition: QuEST_cpu.c:3380
void statevec_cloneQureg(Qureg targetQureg, Qureg copyQureg)
Definition: QuEST_cpu.c:1506
int chunkId
The position of the chunk of the operator held by this process in the full operator.
Definition: QuEST.h:187
void statevec_setAmps(Qureg qureg, long long int startInd, qreal *reals, qreal *imags, long long int numAmps)
Definition: QuEST_cpu.c:1237
void statevec_compactUnitaryLocal(Qureg qureg, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:1688
qreal densmatr_calcPurityLocal(Qureg qureg)
Definition: QuEST_cpu.c:861
Complex statevec_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:3738
Represents a 4x4 matrix of complex numbers.
Definition: QuEST.h:125
Information about the environment the program is running in.
Definition: QuEST.h:242
void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int *targs, int numTargs, ComplexMatrixN u)
Definition: QuEST_cpu.c:1846
void statevec_initBlankState(Qureg qureg)
Definition: QuEST_cpu.c:1398
void statevec_multiControlledTwoQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u)
Definition: QuEST_cpu.c:1747
Represents a general 2^N by 2^N matrix of complex numbers.
Definition: QuEST.h:136
void statevec_initClassicalState(Qureg qureg, long long int stateInd)
Definition: QuEST_cpu.c:1470
void statevec_pauliXDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2556
void densmatr_mixDephasing(Qureg qureg, int targetQubit, qreal dephase)
Definition: QuEST_cpu.c:79
#define qreal
void agnostic_destroyDiagonalOp(DiagonalOp op)
Definition: QuEST_cpu.c:1357
__forceinline__ __device__ long long int flipBit(const long long int number, const int bitInd)
Definition: QuEST_gpu.cu:95
void statevec_multiControlledPhaseShift(Qureg qureg, int *controlQubits, int numControlQubits, qreal angle)
Definition: QuEST_cpu.c:3059
int numQubitsInStateVec
Number of qubits in the state-vector - this is double the number represented for mixed states.
Definition: QuEST.h:210
void statevec_collapseToOutcomeDistributedSetZero(Qureg qureg)
Set all amplitudes in one chunk to 0.
Definition: QuEST_cpu.c:3501
int chunkId
The position of the chunk of the state vector held by this process in the full state vector.
Definition: QuEST.h:217
qreal imag[2][2]
Definition: QuEST.h:117
__forceinline__ __device__ long long int insertZeroBit(const long long int number, const int index)
Definition: QuEST_gpu.cu:99
qreal * imag
The imaginary values of the 2^numQubits complex elements.
Definition: QuEST.h:191
void statevec_controlledPhaseFlip(Qureg qureg, int idQubit1, int idQubit2)
Definition: QuEST_cpu.c:3300
long long int numAmpsPerChunk
Number of probability amplitudes held in stateVec by this process In the non-MPI version,...
Definition: QuEST.h:213
void densmatr_mixTwoQubitDepolarisingLocal(Qureg qureg, int qubit1, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:387
void statevec_initZeroState(Qureg qureg)
Definition: QuEST_cpu.c:1428
void statevec_initPlusState(Qureg qureg)
Definition: QuEST_cpu.c:1438
void statevec_createQureg(Qureg *qureg, int numQubits, QuESTEnv env)
Definition: QuEST_cpu.c:1279
void statevec_controlledPauliYLocal(Qureg qureg, int controlQubit, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2776
void densmatr_oneQubitDegradeOffDiagonal(Qureg qureg, int targetQubit, qreal retain)
Definition: QuEST_cpu.c:48
void densmatr_mixDensityMatrix(Qureg combineQureg, qreal otherProb, Qureg otherQureg)
Definition: QuEST_cpu.c:890
void copyStateToGPU(Qureg qureg)
In GPU mode, this copies the state-vector (or density matrix) from RAM (qureg.stateVec) to VRAM / GPU...
Definition: QuEST_cpu.c:36
void alternateNormZeroingSomeAmpBlocks(Qureg qureg, qreal norm, int normFirst, long long int startAmpInd, long long int numAmps, long long int blockSize)
Definition: QuEST_cpu.c:754
int numRanks
Definition: QuEST.h:245
void statevec_compactUnitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2001
qreal imag[4][4]
Definition: QuEST.h:128
int numQubits
The number of qubits this operator can act on (informing its size)
Definition: QuEST.h:181
void statevec_multiControlledUnitaryLocal(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2173
long long int getQubitBitMask(int *qubits, int numQubits)
Definition: QuEST_common.c:44
void normaliseSomeAmps(Qureg qureg, qreal norm, long long int startInd, long long int numAmps)
Definition: QuEST_cpu.c:744
Represents a diagonal complex operator on the full Hilbert state of a Qureg.
Definition: QuEST.h:178
void statevec_pauliYDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut, int updateUpper, int conjFac)
Rotate a single qubit by +-{{0,-i},{i,0}.
Definition: QuEST_cpu.c:2739
void statevec_getEnvironmentString(QuESTEnv env, Qureg qureg, char str[200])
Definition: QuEST_cpu.c:1390
void densmatr_mixDampingDistributed(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:300
Complex densmatr_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:3781
qreal ** real
Definition: QuEST.h:139
void statevec_pauliXLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:2498
void agnostic_syncDiagonalOp(DiagonalOp op)
Definition: QuEST_cpu.c:1362
void densmatr_initPureStateLocal(Qureg targetQureg, Qureg copyQureg)
Definition: QuEST_cpu.c:1184
__forceinline__ __device__ int extractBit(const int locationOfBitFromRight, const long long int theEncodedNumber)
Definition: QuEST_gpu.cu:82
void densmatr_mixDampingLocal(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:174
Represents a system of qubits.
Definition: QuEST.h:203
qreal densmatr_calcInnerProductLocal(Qureg a, Qureg b)
computes Tr(conjTrans(a) b) = sum of (a_ij^* b_ij)
Definition: QuEST_cpu.c:958
__forceinline__ __device__ long long int insertTwoZeroBits(const long long int number, const int bit1, const int bit2)
Definition: QuEST_gpu.cu:106
qreal ** imag
Definition: QuEST.h:140
void statevec_unitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit given a subset of the state vector with upper and lower b...
Definition: QuEST_cpu.c:2056
void statevec_controlledCompactUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2319
int statevec_compareStates(Qureg mq1, Qureg mq2, qreal precision)
Definition: QuEST_cpu.c:1675
void statevec_multiControlledPhaseFlip(Qureg qureg, int *controlQubits, int numControlQubits)
Definition: QuEST_cpu.c:3331
ComplexArray stateVec
Computational state amplitudes - a subset thereof in the MPI version.
Definition: QuEST.h:222
qreal real[2][2]
Definition: QuEST.h:116
void densmatr_mixDepolarisingLocal(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:125
void statevec_collapseToKnownProbOutcomeDistributedRenorm(Qureg qureg, int measureQubit, qreal totalProbability)
Renormalise parts of the state vector where measureQubit=0 or 1, based on the total probability of th...
Definition: QuEST_cpu.c:3462
long long int numElemsPerChunk
The number of the 2^numQubits amplitudes stored on each distributed node.
Definition: QuEST.h:183
int isDensityMatrix
Whether this instance is a density-state representation.
Definition: QuEST.h:206
void statevec_hadamardLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:2872
int numQubits
Definition: QuEST.h:138
void densmatr_applyDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:3696
void statevec_controlledUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2241
static int isOddParity(const long long int number, const int qb1, const int qb2)
void densmatr_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb)
Renorms (/prob) every | * outcome * >< * outcome * | state, setting all others to zero.
Definition: QuEST_cpu.c:785
void statevec_destroyQureg(Qureg qureg, QuESTEnv env)
Definition: QuEST_cpu.c:1317
void densmatr_mixTwoQubitDepolarisingLocalPart1(Qureg qureg, int qubit1, int qubit2, qreal delta)
Definition: QuEST_cpu.c:488
void statevec_multiRotateZ(Qureg qureg, long long int mask, qreal angle)
Definition: QuEST_cpu.c:3109
void statevec_controlledNotDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2646
int numQubitsRepresented
The number of qubits represented in either the state-vector or density matrix.
Definition: QuEST.h:208
long long int numAmpsTotal
Total number of amplitudes, which are possibly distributed among machines.
Definition: QuEST.h:215
qreal * real
The real values of the 2^numQubits complex elements.
Definition: QuEST.h:189
qreal real
Definition: QuEST.h:105
void statevec_swapQubitAmpsDistributed(Qureg qureg, int pairRank, int qb1, int qb2)
qureg.pairStateVec contains the entire set of amplitudes of the paired node which includes the set of...
Definition: QuEST_cpu.c:3579
int statevec_initStateFromSingleFile(Qureg *qureg, char filename[200], QuESTEnv env)
Definition: QuEST_cpu.c:1625
qreal imag
Definition: QuEST.h:106
void densmatr_mixTwoQubitDepolarisingDistributed(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:541
void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:1932
void densmatr_mixTwoQubitDephasing(Qureg qureg, int qubit1, int qubit2, qreal dephase)
Definition: QuEST_cpu.c:84
qreal densmatr_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Definition: QuEST_cpu.c:3151
Represents one complex number.
Definition: QuEST.h:103
void statevec_hadamardDistributed(Qureg qureg, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut, int updateUpper)
Rotate a single qubit by {{1,1},{1,-1}}/sqrt2.
Definition: QuEST_cpu.c:2932
void statevec_reportStateToScreen(Qureg qureg, QuESTEnv env, int reportRank)
Definition: QuEST_cpu.c:1366
void statevec_setWeightedQureg(Complex fac1, Qureg qureg1, Complex fac2, Qureg qureg2, Complex facOut, Qureg out)
Definition: QuEST_cpu.c:3619
void densmatr_initPlusState(Qureg qureg)
Definition: QuEST_cpu.c:1154
void statevec_initStateOfSingleQubit(Qureg *qureg, int qubitId, int outcome)
Initialise the state vector of probability amplitudes such that one qubit is set to 'outcome' and all...
Definition: QuEST_cpu.c:1545
void statevec_controlledPauliYDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut, int conjFac)
Definition: QuEST_cpu.c:2830
void statevec_applyDiagonalOp(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:3661
void statevec_controlledCompactUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:2101
Complex statevec_calcInnerProductLocal(Qureg bra, Qureg ket)
Definition: QuEST_cpu.c:1071
void statevec_phaseShiftByTerm(Qureg qureg, int targetQubit, Complex term)
Definition: QuEST_cpu.c:2978
Represents a 2x2 matrix of complex numbers.
Definition: QuEST.h:114
void zeroSomeAmps(Qureg qureg, long long int startInd, long long int numAmps)
Definition: QuEST_cpu.c:734
DiagonalOp agnostic_createDiagonalOp(int numQubits, QuESTEnv env)
Definition: QuEST_cpu.c:1335
qreal densmatr_calcFidelityLocal(Qureg qureg, Qureg pureState)
computes a few dens-columns-worth of (vec^*T) dens * vec
Definition: QuEST_cpu.c:990
qreal statevec_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Measure the total probability of a specified qubit being in the zero state across all amplitudes in t...
Definition: QuEST_cpu.c:3206