50 long long int innerMask = 1LL << targetQubit;
53 long long int thisTask;
54 long long int thisPattern;
55 long long int totMask = innerMask|outerMask;
58 # pragma omp parallel \
60 shared (innerMask,outerMask,totMask,qureg,retain,numTasks, targetQubit) \
61 private (thisTask,thisPattern)
65 # pragma omp for schedule (static)
67 for (thisTask=0; thisTask<numTasks; thisTask++){
69 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
80 qreal retain=1-dephase;
85 qreal retain=1-dephase;
88 long long int innerMaskQubit1 = 1LL << qubit1;
90 long long int innerMaskQubit2 = 1LL << qubit2;
92 long long int totMaskQubit1 = innerMaskQubit1|outerMaskQubit1;
93 long long int totMaskQubit2 = innerMaskQubit2|outerMaskQubit2;
95 long long int thisTask;
96 long long int thisPatternQubit1, thisPatternQubit2;
99 # pragma omp parallel \
101 shared (innerMaskQubit1,outerMaskQubit1,totMaskQubit1,innerMaskQubit2,outerMaskQubit2, \
102 totMaskQubit2,qureg,retain,numTasks) \
103 private (thisTask,thisPatternQubit1,thisPatternQubit2)
107 # pragma omp for schedule (static)
109 for (thisTask=0; thisTask<numTasks; thisTask++){
114 if ( (thisPatternQubit1==innerMaskQubit1) || (thisPatternQubit1==outerMaskQubit1) ||
115 (thisPatternQubit2==innerMaskQubit2) || (thisPatternQubit2==outerMaskQubit2) ){
126 qreal retain=1-depolLevel;
129 long long int innerMask = 1LL << targetQubit;
131 long long int totMask = innerMask|outerMask;
133 long long int thisTask;
134 long long int partner;
135 long long int thisPattern;
137 qreal realAv, imagAv;
140 # pragma omp parallel \
142 shared (innerMask,outerMask,totMask,qureg,retain,depolLevel,numTasks) \
143 private (thisTask,partner,thisPattern,realAv,imagAv)
147 # pragma omp for schedule (static)
149 for (thisTask=0; thisTask<numTasks; thisTask++){
151 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
157 if ((thisTask&totMask)==0){
159 partner = thisTask | totMask;
160 realAv = (qureg.
stateVec.real[thisTask] + qureg.
stateVec.real[partner]) /2 ;
161 imagAv = (qureg.
stateVec.imag[thisTask] + qureg.
stateVec.imag[partner]) /2 ;
163 qureg.
stateVec.real[thisTask] = retain*qureg.
stateVec.real[thisTask] + depolLevel*realAv;
164 qureg.
stateVec.imag[thisTask] = retain*qureg.
stateVec.imag[thisTask] + depolLevel*imagAv;
166 qureg.
stateVec.real[partner] = retain*qureg.
stateVec.real[partner] + depolLevel*realAv;
167 qureg.
stateVec.imag[partner] = retain*qureg.
stateVec.imag[partner] + depolLevel*imagAv;
175 qreal retain=1-damping;
176 qreal dephase=sqrt(retain);
179 long long int innerMask = 1LL << targetQubit;
181 long long int totMask = innerMask|outerMask;
183 long long int thisTask;
184 long long int partner;
185 long long int thisPattern;
190 # pragma omp parallel \
192 shared (innerMask,outerMask,totMask,qureg,retain,damping,dephase,numTasks) \
193 private (thisTask,partner,thisPattern)
197 # pragma omp for schedule (static)
199 for (thisTask=0; thisTask<numTasks; thisTask++){
201 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
207 if ((thisTask&totMask)==0){
209 partner = thisTask | totMask;
232 long long int sizeInnerBlock, sizeInnerHalfBlock;
233 long long int sizeOuterColumn, sizeOuterHalfColumn;
234 long long int thisInnerBlock,
237 thisIndexInOuterColumn,
238 thisIndexInInnerBlock;
241 long long int thisTask;
245 sizeInnerHalfBlock = 1LL << targetQubit;
246 sizeInnerBlock = 2LL * sizeInnerHalfBlock;
248 sizeOuterHalfColumn = sizeOuterColumn >> 1;
251 # pragma omp parallel \
253 shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
254 qureg,depolLevel,numTasks,targetQubit) \
255 private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
256 thisIndexInInnerBlock,outerBit)
260 # pragma omp for schedule (static)
267 for (thisTask=0; thisTask<numTasks; thisTask++) {
270 thisOuterColumn = thisTask / sizeOuterHalfColumn;
271 thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1);
272 thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
274 thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1);
275 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
276 + thisIndexInInnerBlock;
281 thisIndex += outerBit*(sizeInnerHalfBlock);
291 qureg.
stateVec.real[thisIndex] = (1-depolLevel)*qureg.
stateVec.real[thisIndex] +
294 qureg.
stateVec.imag[thisIndex] = (1-depolLevel)*qureg.
stateVec.imag[thisIndex] +
301 qreal retain=1-damping;
302 qreal dephase=sqrt(1-damping);
309 long long int sizeInnerBlock, sizeInnerHalfBlock;
310 long long int sizeOuterColumn, sizeOuterHalfColumn;
311 long long int thisInnerBlock,
314 thisIndexInOuterColumn,
315 thisIndexInInnerBlock;
319 long long int thisTask;
323 sizeInnerHalfBlock = 1LL << targetQubit;
324 sizeInnerBlock = 2LL * sizeInnerHalfBlock;
326 sizeOuterHalfColumn = sizeOuterColumn >> 1;
329 # pragma omp parallel \
331 shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
332 qureg,damping, retain, dephase, numTasks,targetQubit) \
333 private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
334 thisIndexInInnerBlock,outerBit, stateBit)
338 # pragma omp for schedule (static)
345 for (thisTask=0; thisTask<numTasks; thisTask++) {
348 thisOuterColumn = thisTask / sizeOuterHalfColumn;
349 thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1);
350 thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
352 thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1);
353 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
354 + thisIndexInInnerBlock;
359 thisIndex += outerBit*(sizeInnerHalfBlock);
389 long long int innerMaskQubit1 = 1LL << qubit1;
391 long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
392 long long int innerMaskQubit2 = 1LL << qubit2;
394 long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
396 long long int thisTask;
397 long long int partner;
398 long long int thisPatternQubit1, thisPatternQubit2;
400 qreal real00, imag00;
403 # pragma omp parallel \
405 shared (totMaskQubit1,totMaskQubit2,qureg,delta,gamma,numTasks) \
406 private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
410 # pragma omp for schedule (static)
413 for (thisTask=0; thisTask<numTasks; thisTask++){
416 if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
417 || (thisPatternQubit2==totMaskQubit2))){
419 partner = thisTask | totMaskQubit1;
420 real00 = qureg.
stateVec.real[thisTask];
421 imag00 = qureg.
stateVec.imag[thisTask];
424 + delta*qureg.
stateVec.real[partner];
426 + delta*qureg.
stateVec.imag[partner];
434 # pragma omp for schedule (static)
437 for (thisTask=0; thisTask<numTasks; thisTask++){
440 if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
441 || (thisPatternQubit1==totMaskQubit1))){
443 partner = thisTask | totMaskQubit2;
444 real00 = qureg.
stateVec.real[thisTask];
445 imag00 = qureg.
stateVec.imag[thisTask];
448 + delta*qureg.
stateVec.real[partner];
450 + delta*qureg.
stateVec.imag[partner];
459 # pragma omp for schedule (static)
462 for (thisTask=0; thisTask<numTasks; thisTask++){
465 if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
466 || (thisPatternQubit1==totMaskQubit1))){
468 partner = thisTask | totMaskQubit2;
469 partner = partner ^ totMaskQubit1;
470 real00 = qureg.
stateVec.real[thisTask];
471 imag00 = qureg.
stateVec.imag[thisTask];
474 + delta*qureg.
stateVec.real[partner]);
476 + delta*qureg.
stateVec.imag[partner]);
490 long long int innerMaskQubit1 = 1LL << qubit1;
492 long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
493 long long int innerMaskQubit2 = 1LL << qubit2;
495 long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
500 long long int thisTask;
501 long long int partner;
502 long long int thisPatternQubit1, thisPatternQubit2;
504 qreal real00, imag00;
507 # pragma omp parallel \
509 shared (totMaskQubit1,totMaskQubit2,qureg,delta,numTasks) \
510 private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
515 # pragma omp for schedule (static)
518 for (thisTask=0; thisTask<numTasks; thisTask ++){
521 if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
522 || (thisPatternQubit2==totMaskQubit2))){
524 partner = thisTask | totMaskQubit1;
525 real00 = qureg.
stateVec.real[thisTask];
526 imag00 = qureg.
stateVec.imag[thisTask];
529 + delta*qureg.
stateVec.real[partner];
531 + delta*qureg.
stateVec.imag[partner];
544 long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
545 long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
546 long long int sizeOuterColumn, sizeOuterQuarterColumn;
547 long long int thisInnerBlockQ2,
550 thisIndexInOuterColumn,
551 thisIndexInInnerBlockQ1,
552 thisIndexInInnerBlockQ2,
553 thisInnerBlockQ1InInnerBlockQ2;
554 int outerBitQ1, outerBitQ2;
556 long long int thisTask;
560 sizeInnerHalfBlockQ1 = 1LL << targetQubit;
561 sizeInnerHalfBlockQ2 = 1LL << qubit2;
562 sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
563 sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
564 sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
566 sizeOuterQuarterColumn = sizeOuterColumn >> 2;
569 # pragma omp parallel \
571 shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
572 sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma,numTasks,targetQubit,qubit2) \
573 private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
574 thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
575 thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
579 # pragma omp for schedule (static)
586 for (thisTask=0; thisTask<numTasks; thisTask++) {
589 thisOuterColumn = thisTask / sizeOuterQuarterColumn;
591 thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
592 thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
594 thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
595 thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
597 thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
600 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
601 + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
607 thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
613 thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
635 long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
636 long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
637 long long int sizeOuterColumn, sizeOuterQuarterColumn;
638 long long int thisInnerBlockQ2,
641 thisIndexInPairVector,
642 thisIndexInOuterColumn,
643 thisIndexInInnerBlockQ1,
644 thisIndexInInnerBlockQ2,
645 thisInnerBlockQ1InInnerBlockQ2;
646 int outerBitQ1, outerBitQ2;
648 long long int thisTask;
652 sizeInnerHalfBlockQ1 = 1LL << targetQubit;
653 sizeInnerHalfBlockQ2 = 1LL << qubit2;
654 sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
655 sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
656 sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
658 sizeOuterQuarterColumn = sizeOuterColumn >> 2;
662 # pragma omp parallel \
664 shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
665 sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma, numTasks,targetQubit,qubit2) \
666 private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
667 thisOuterColumn,thisIndex,thisIndexInPairVector,thisIndexInOuterColumn, \
668 thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
672 # pragma omp for schedule (static)
680 for (thisTask=0; thisTask<numTasks; thisTask++) {
683 thisOuterColumn = thisTask / sizeOuterQuarterColumn;
685 thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
686 thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
688 thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
689 thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
691 thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
694 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
695 + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
701 thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
705 thisIndexInPairVector = thisTask + (1-outerBitQ1)*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn -
706 outerBitQ1*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn;
712 thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
737 # pragma omp parallel for schedule (static)
739 for (i=startInd; i < startInd+numAmps; i++) {
747 # pragma omp parallel for schedule (static)
749 for (i=startInd; i < startInd+numAmps; i++) {
756 long long int startAmpInd,
long long int numAmps,
long long int blockSize
758 long long int numDubBlocks = numAmps / (2*blockSize);
759 long long int blockStartInd;
762 long long int dubBlockInd;
764 # pragma omp parallel for schedule (static) private (blockStartInd)
766 for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
767 blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
769 zeroSomeAmps( qureg, blockStartInd + blockSize, blockSize);
772 long long int dubBlockInd;
774 # pragma omp parallel for schedule (static) private (blockStartInd)
776 for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
777 blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
792 long long int innerBlockSize = (1LL << measureQubit);
798 long long int globalStartInd = qureg.
chunkId * locNumAmps;
799 int innerBit =
extractBit(measureQubit, globalStartInd);
803 if (locNumAmps <= outerBlockSize) {
806 if (outerBit != outcome)
810 if (locNumAmps <= innerBlockSize) {
813 if (innerBit != outcome)
822 qureg, totalStateProb, innerBit==outcome, 0, qureg.
numAmpsPerChunk, innerBlockSize);
826 long long int numOuterDoubleBlocks = locNumAmps / (2*outerBlockSize);
827 long long int firstBlockInd;
832 if (outerBit == outcome) {
834 for (
long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
835 firstBlockInd = outerDubBlockInd*2*outerBlockSize;
839 qureg, totalStateProb, innerBit==outcome,
840 firstBlockInd, outerBlockSize, innerBlockSize);
843 zeroSomeAmps(qureg, firstBlockInd + outerBlockSize, outerBlockSize);
848 for (
long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
849 firstBlockInd = outerDubBlockInd*2*outerBlockSize;
854 qureg, totalStateProb, innerBit==outcome,
855 firstBlockInd + outerBlockSize, outerBlockSize, innerBlockSize);
872 # pragma omp parallel \
873 shared (vecRe, vecIm, numAmps) \
875 reduction ( +:trace )
879 # pragma omp for schedule (static)
881 for (index=0LL; index<numAmps; index++) {
883 trace += vecRe[index]*vecRe[index] + vecIm[index]*vecIm[index];
903 # pragma omp parallel \
905 shared (combineVecRe,combineVecIm,otherVecRe,otherVecIm, otherProb, numAmps) \
910 # pragma omp for schedule (static)
912 for (index=0; index < numAmps; index++) {
913 combineVecRe[index] *= 1-otherProb;
914 combineVecIm[index] *= 1-otherProb;
916 combineVecRe[index] += otherProb * otherVecRe[index];
917 combineVecIm[index] += otherProb * otherVecIm[index];
937 # pragma omp parallel \
938 shared (aRe,aIm, bRe,bIm, numAmps) \
939 private (index,difRe,difIm) \
940 reduction ( +:trace )
944 # pragma omp for schedule (static)
946 for (index=0LL; index<numAmps; index++) {
948 difRe = aRe[index] - bRe[index];
949 difIm = aIm[index] - bIm[index];
950 trace += difRe*difRe + difIm*difIm;
971 # pragma omp parallel \
972 shared (aRe,aIm, bRe,bIm, numAmps) \
974 reduction ( +:trace )
978 # pragma omp for schedule (static)
980 for (index=0LL; index<numAmps; index++) {
981 trace += aRe[index]*bRe[index] + aIm[index]*bIm[index];
1021 qreal densElemRe, densElemIm;
1022 qreal prefacRe, prefacIm;
1023 qreal rowSumRe, rowSumIm;
1024 qreal vecElemRe, vecElemIm;
1027 qreal globalSumRe = 0;
1030 # pragma omp parallel \
1031 shared (vecRe,vecIm,densRe,densIm, dim,colsPerNode,startCol) \
1032 private (row,col, prefacRe,prefacIm, rowSumRe,rowSumIm, densElemRe,densElemIm, vecElemRe,vecElemIm) \
1033 reduction ( +:globalSumRe )
1037 # pragma omp for schedule (static)
1040 for (row=0; row < dim; row++) {
1043 prefacRe = vecRe[row];
1044 prefacIm = - vecIm[row];
1050 for (col=0; col < colsPerNode; col++) {
1053 densElemRe = densRe[row + dim*col];
1054 densElemIm = densIm[row + dim*col];
1057 vecElemRe = vecRe[startCol + col];
1058 vecElemIm = vecIm[startCol + col];
1060 rowSumRe += densElemRe*vecElemRe - densElemIm*vecElemIm;
1061 rowSumIm += densElemRe*vecElemIm + densElemIm*vecElemRe;
1064 globalSumRe += rowSumRe*prefacRe - rowSumIm*prefacIm;
1073 qreal innerProdReal = 0;
1074 qreal innerProdImag = 0;
1076 long long int index;
1083 qreal braRe, braIm, ketRe, ketIm;
1086 # pragma omp parallel \
1087 shared (braVecReal, braVecImag, ketVecReal, ketVecImag, numAmps) \
1088 private (index, braRe, braIm, ketRe, ketIm) \
1089 reduction ( +:innerProdReal, innerProdImag )
1093 # pragma omp for schedule (static)
1095 for (index=0; index < numAmps; index++) {
1096 braRe = braVecReal[index];
1097 braIm = braVecImag[index];
1098 ketRe = ketVecReal[index];
1099 ketIm = ketVecImag[index];
1102 innerProdReal += braRe*ketRe + braIm*ketIm;
1103 innerProdImag += braRe*ketIm - braIm*ketRe;
1108 innerProd.
real = innerProdReal;
1109 innerProd.
imag = innerProdImag;
1125 long long int index;
1127 # pragma omp parallel \
1129 shared (densityNumElems, densityReal, densityImag) \
1134 # pragma omp for schedule (static)
1136 for (index=0; index<densityNumElems; index++) {
1137 densityReal[index] = 0.0;
1138 densityImag[index] = 0.0;
1144 long long int densityInd = (densityDim + 1)*stateInd;
1147 if (qureg.
chunkId == densityInd / densityNumElems){
1148 densityReal[densityInd % densityNumElems] = 1.0;
1149 densityImag[densityInd % densityNumElems] = 0.0;
1164 long long int index;
1168 # pragma omp parallel \
1170 shared (chunkSize, densityReal, densityImag, probFactor) \
1175 # pragma omp for schedule (static)
1177 for (index=0; index<chunkSize; index++) {
1178 densityReal[index] = probFactor;
1179 densityImag[index] = 0.0;
1201 long long int col, row, index;
1204 qreal ketRe, ketIm, braRe, braIm;
1207 # pragma omp parallel \
1209 shared (colOffset, colsPerNode,rowsPerNode, vecRe,vecIm,densRe,densIm) \
1210 private (col,row, ketRe,ketIm,braRe,braIm, index)
1214 # pragma omp for schedule (static)
1217 for (col=0; col < colsPerNode; col++) {
1220 for (row=0; row < rowsPerNode; row++) {
1225 braRe = vecRe[col + colOffset];
1226 braIm = - vecIm[col + colOffset];
1229 index = row + col*rowsPerNode;
1230 densRe[index] = ketRe*braRe - ketIm*braIm;
1231 densIm[index] = ketRe*braIm + ketIm*braRe;
1244 long long int localEndInd = localStartInd + numAmps;
1250 if (localStartInd < 0)
1257 long long int index;
1262 # pragma omp parallel \
1264 shared (localStartInd,localEndInd, vecRe,vecIm, reals,imags, offset) \
1269 # pragma omp for schedule (static)
1272 for (index=localStartInd; index < localEndInd; index++) {
1273 vecRe[index] = reals[index + offset];
1274 vecIm[index] = imags[index + offset];
1281 long long int numAmps = 1LL << numQubits;
1282 long long int numAmpsPerRank = numAmps/env.
numRanks;
1284 if (numAmpsPerRank > SIZE_MAX) {
1285 printf(
"Could not allocate memory (cannot fit numAmps into size_t)!");
1286 exit (EXIT_FAILURE);
1289 size_t arrSize = (size_t) (numAmpsPerRank *
sizeof(*(qureg->
stateVec.real)));
1290 qureg->
stateVec.real = malloc(arrSize);
1291 qureg->
stateVec.imag = malloc(arrSize);
1298 && numAmpsPerRank ) {
1299 printf(
"Could not allocate memory!");
1300 exit (EXIT_FAILURE);
1304 && numAmpsPerRank ) {
1305 printf(
"Could not allocate memory!");
1306 exit (EXIT_FAILURE);
1350 printf(
"Could not allocate memory!\n");
1367 long long int index;
1370 for (rank=0; rank<qureg.
numChunks; rank++){
1373 printf(
"Reporting state from rank %d [\n", qureg.
chunkId);
1374 printf(
"real, imag\n");
1375 }
else if (rank==0) {
1376 printf(
"Reporting state [\n");
1377 printf(
"real, imag\n");
1382 printf(REAL_STRING_FORMAT
", " REAL_STRING_FORMAT
"\n", qureg.
stateVec.real[index], qureg.
stateVec.imag[index]);
1384 if (reportRank || rank==qureg.
numChunks-1) printf(
"]\n");
1388 }
else printf(
"Error: reportStateToScreen will not print output for systems of more than 5 qubits.\n");
1393 numThreads=omp_get_max_threads();
1400 long long int stateVecSize;
1401 long long int index;
1412 # pragma omp parallel \
1414 shared (stateVecSize, stateVecReal, stateVecImag) \
1419 # pragma omp for schedule (static)
1421 for (index=0; index<stateVecSize; index++) {
1422 stateVecReal[index] = 0.0;
1423 stateVecImag[index] = 0.0;
1440 long long int chunkSize, stateVecSize;
1441 long long int index;
1445 stateVecSize = chunkSize*qureg.
numChunks;
1446 qreal normFactor = 1.0/sqrt((
qreal)stateVecSize);
1454 # pragma omp parallel \
1456 shared (chunkSize, stateVecReal, stateVecImag, normFactor) \
1461 # pragma omp for schedule (static)
1463 for (index=0; index<chunkSize; index++) {
1464 stateVecReal[index] = normFactor;
1465 stateVecImag[index] = 0.0;
1472 long long int stateVecSize;
1473 long long int index;
1484 # pragma omp parallel \
1486 shared (stateVecSize, stateVecReal, stateVecImag) \
1491 # pragma omp for schedule (static)
1493 for (index=0; index<stateVecSize; index++) {
1494 stateVecReal[index] = 0.0;
1495 stateVecImag[index] = 0.0;
1500 if (qureg.
chunkId == stateInd/stateVecSize){
1501 stateVecReal[stateInd % stateVecSize] = 1.0;
1502 stateVecImag[stateInd % stateVecSize] = 0.0;
1509 long long int stateVecSize;
1510 long long int index;
1523 # pragma omp parallel \
1525 shared (stateVecSize, targetStateVecReal, targetStateVecImag, copyStateVecReal, copyStateVecImag) \
1530 # pragma omp for schedule (static)
1532 for (index=0; index<stateVecSize; index++) {
1533 targetStateVecReal[index] = copyStateVecReal[index];
1534 targetStateVecImag[index] = copyStateVecImag[index];
1547 long long int chunkSize, stateVecSize;
1548 long long int index;
1550 long long int chunkId=qureg->
chunkId;
1554 stateVecSize = chunkSize*qureg->
numChunks;
1555 qreal normFactor = 1.0/sqrt((
qreal)stateVecSize/2.0);
1563 # pragma omp parallel \
1565 shared (chunkSize, stateVecReal, stateVecImag, normFactor, qubitId, outcome, chunkId) \
1566 private (index, bit)
1570 # pragma omp for schedule (static)
1572 for (index=0; index<chunkSize; index++) {
1573 bit =
extractBit(qubitId, index+chunkId*chunkSize);
1575 stateVecReal[index] = normFactor;
1576 stateVecImag[index] = 0.0;
1578 stateVecReal[index] = 0.0;
1579 stateVecImag[index] = 0.0;
1593 long long int chunkSize;
1594 long long int index;
1595 long long int indexOffset;
1604 indexOffset = chunkSize * qureg.
chunkId;
1608 # pragma omp parallel \
1610 shared (chunkSize, stateVecReal, stateVecImag, indexOffset) \
1615 # pragma omp for schedule (static)
1617 for (index=0; index<chunkSize; index++) {
1618 stateVecReal[index] = ((indexOffset + index)*2.0)/10.0;
1619 stateVecImag[index] = ((indexOffset + index)*2.0+1.0)/10.0;
1626 long long int chunkSize, stateVecSize;
1627 long long int indexInChunk, totalIndex;
1630 stateVecSize = chunkSize*qureg->
numChunks;
1638 for (
int rank=0; rank<(qureg->
numChunks); rank++){
1640 fp = fopen(filename,
"r");
1646 indexInChunk = 0; totalIndex = 0;
1647 while (fgets(line,
sizeof(
char)*200, fp) != NULL && totalIndex<stateVecSize){
1649 int chunkId = (int) (totalIndex/chunkSize);
1652 sscanf(line,
"%f, %f", &(stateVecReal[indexInChunk]),
1653 &(stateVecImag[indexInChunk]));
1654 # elif QuEST_PREC==2
1655 sscanf(line,
"%lf, %lf", &(stateVecReal[indexInChunk]),
1656 &(stateVecImag[indexInChunk]));
1657 # elif QuEST_PREC==4
1658 sscanf(line,
"%Lf, %Lf", &(stateVecReal[indexInChunk]),
1659 &(stateVecImag[indexInChunk]));
1679 for (
long long int i=0; i<chunkSize; i++){
1681 if (diff>precision)
return 0;
1683 if (diff>precision)
return 0;
1690 long long int sizeBlock, sizeHalfBlock;
1691 long long int thisBlock,
1694 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1695 long long int thisTask;
1699 sizeHalfBlock = 1LL << targetQubit;
1700 sizeBlock = 2LL * sizeHalfBlock;
1709 # pragma omp parallel \
1711 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, numTasks) \
1712 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1716 # pragma omp for schedule (static)
1718 for (thisTask=0; thisTask<numTasks; thisTask++) {
1720 thisBlock = thisTask / sizeHalfBlock;
1721 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1722 indexLo = indexUp + sizeHalfBlock;
1725 stateRealUp = stateVecReal[indexUp];
1726 stateImagUp = stateVecImag[indexUp];
1728 stateRealLo = stateVecReal[indexLo];
1729 stateImagLo = stateVecImag[indexLo];
1732 stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
1733 - betaReal*stateRealLo - betaImag*stateImagLo;
1734 stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
1735 - betaReal*stateImagLo + betaImag*stateRealLo;
1738 stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
1739 + alphaReal*stateRealLo + alphaImag*stateImagLo;
1740 stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
1741 + alphaReal*stateImagLo - alphaImag*stateRealLo;
1757 long long int thisTask;
1758 long long int thisGlobalInd00;
1759 long long int ind00, ind01, ind10, ind11;
1760 qreal re00, re01, re10, re11;
1761 qreal im00, im01, im10, im11;
1764 # pragma omp parallel \
1766 shared (reVec,imVec,globalIndStart,numTasks,ctrlMask,u,q2,q1) \
1767 private (thisTask, thisGlobalInd00, ind00,ind01,ind10,ind11, re00,re01,re10,re11, im00,im01,im10,im11)
1771 # pragma omp for schedule (static)
1773 for (thisTask=0; thisTask<numTasks; thisTask++) {
1779 thisGlobalInd00 = ind00 + globalIndStart;
1780 if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1789 re00 = reVec[ind00]; im00 = imVec[ind00];
1790 re01 = reVec[ind01]; im01 = imVec[ind01];
1791 re10 = reVec[ind10]; im10 = imVec[ind10];
1792 re11 = reVec[ind11]; im11 = imVec[ind11];
1796 u.
real[0][0]*re00 - u.
imag[0][0]*im00 +
1797 u.
real[0][1]*re01 - u.
imag[0][1]*im01 +
1798 u.
real[0][2]*re10 - u.
imag[0][2]*im10 +
1799 u.
real[0][3]*re11 - u.
imag[0][3]*im11;
1801 u.
imag[0][0]*re00 + u.
real[0][0]*im00 +
1802 u.
imag[0][1]*re01 + u.
real[0][1]*im01 +
1803 u.
imag[0][2]*re10 + u.
real[0][2]*im10 +
1804 u.
imag[0][3]*re11 + u.
real[0][3]*im11;
1807 u.
real[1][0]*re00 - u.
imag[1][0]*im00 +
1808 u.
real[1][1]*re01 - u.
imag[1][1]*im01 +
1809 u.
real[1][2]*re10 - u.
imag[1][2]*im10 +
1810 u.
real[1][3]*re11 - u.
imag[1][3]*im11;
1812 u.
imag[1][0]*re00 + u.
real[1][0]*im00 +
1813 u.
imag[1][1]*re01 + u.
real[1][1]*im01 +
1814 u.
imag[1][2]*re10 + u.
real[1][2]*im10 +
1815 u.
imag[1][3]*re11 + u.
real[1][3]*im11;
1818 u.
real[2][0]*re00 - u.
imag[2][0]*im00 +
1819 u.
real[2][1]*re01 - u.
imag[2][1]*im01 +
1820 u.
real[2][2]*re10 - u.
imag[2][2]*im10 +
1821 u.
real[2][3]*re11 - u.
imag[2][3]*im11;
1823 u.
imag[2][0]*re00 + u.
real[2][0]*im00 +
1824 u.
imag[2][1]*re01 + u.
real[2][1]*im01 +
1825 u.
imag[2][2]*re10 + u.
real[2][2]*im10 +
1826 u.
imag[2][3]*re11 + u.
real[2][3]*im11;
1829 u.
real[3][0]*re00 - u.
imag[3][0]*im00 +
1830 u.
real[3][1]*re01 - u.
imag[3][1]*im01 +
1831 u.
real[3][2]*re10 - u.
imag[3][2]*im10 +
1832 u.
real[3][3]*re11 - u.
imag[3][3]*im11;
1834 u.
imag[3][0]*re00 + u.
real[3][0]*im00 +
1835 u.
imag[3][1]*re01 + u.
real[3][1]*im01 +
1836 u.
imag[3][2]*re10 + u.
real[3][2]*im10 +
1837 u.
imag[3][3]*re11 + u.
real[3][3]*im11;
1843 return *(
int*)a - *(
int*)b;
1853 long long int numTargAmps = 1 << u.
numQubits;
1858 long long int thisTask;
1859 long long int thisInd00;
1860 long long int thisGlobalInd00;
1863 qreal reElem, imElem;
1867 long long int ampInds[numTargAmps];
1868 qreal reAmps[numTargAmps];
1869 qreal imAmps[numTargAmps];
1873 int sortedTargs[numTargs];
1874 for (
int t=0; t < numTargs; t++)
1875 sortedTargs[t] = targs[t];
1876 qsort(sortedTargs, numTargs,
sizeof(
int),
qsortComp);
1879 # pragma omp parallel \
1881 shared (reVec,imVec, numTasks,numTargAmps,globalIndStart, ctrlMask,targs,sortedTargs,u,numTargs) \
1882 private (thisTask,thisInd00,thisGlobalInd00,ind,i,t,r,c,reElem,imElem, ampInds,reAmps,imAmps)
1886 # pragma omp for schedule (static)
1888 for (thisTask=0; thisTask<numTasks; thisTask++) {
1891 thisInd00 = thisTask;
1892 for (t=0; t < numTargs; t++)
1896 thisGlobalInd00 = thisInd00 + globalIndStart;
1897 if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1901 for (i=0; i < numTargAmps; i++) {
1905 for (t=0; t < numTargs; t++)
1911 reAmps [i] = reVec[ind];
1912 imAmps [i] = imVec[ind];
1916 for (r=0; r < numTargAmps; r++) {
1921 for (c=0; c < numTargAmps; c++) {
1922 reElem = u.
real[r][c];
1923 imElem = u.
imag[r][c];
1924 reVec[ind] += reAmps[c]*reElem - imAmps[c]*imElem;
1925 imVec[ind] += reAmps[c]*imElem + imAmps[c]*reElem;
1934 long long int sizeBlock, sizeHalfBlock;
1935 long long int thisBlock,
1938 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1939 long long int thisTask;
1943 sizeHalfBlock = 1LL << targetQubit;
1944 sizeBlock = 2LL * sizeHalfBlock;
1951 # pragma omp parallel \
1953 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks) \
1954 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1958 # pragma omp for schedule (static)
1960 for (thisTask=0; thisTask<numTasks; thisTask++) {
1962 thisBlock = thisTask / sizeHalfBlock;
1963 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1964 indexLo = indexUp + sizeHalfBlock;
1967 stateRealUp = stateVecReal[indexUp];
1968 stateImagUp = stateVecImag[indexUp];
1970 stateRealLo = stateVecReal[indexLo];
1971 stateImagLo = stateVecImag[indexLo];
1975 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
1976 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
1977 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
1978 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
1981 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
1982 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
1983 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
1984 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2003 ComplexArray stateVecUp,
2004 ComplexArray stateVecLo,
2005 ComplexArray stateVecOut)
2008 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2009 long long int thisTask;
2014 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2015 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2016 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2019 # pragma omp parallel \
2021 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2022 rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks) \
2023 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2027 # pragma omp for schedule (static)
2029 for (thisTask=0; thisTask<numTasks; thisTask++) {
2031 stateRealUp = stateVecRealUp[thisTask];
2032 stateImagUp = stateVecImagUp[thisTask];
2034 stateRealLo = stateVecRealLo[thisTask];
2035 stateImagLo = stateVecImagLo[thisTask];
2038 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2039 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2058 ComplexArray stateVecUp,
2059 ComplexArray stateVecLo,
2060 ComplexArray stateVecOut)
2063 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2064 long long int thisTask;
2069 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2070 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2071 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2075 # pragma omp parallel \
2077 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2078 rot1Real, rot1Imag, rot2Real, rot2Imag,numTasks) \
2079 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2083 # pragma omp for schedule (static)
2085 for (thisTask=0; thisTask<numTasks; thisTask++) {
2087 stateRealUp = stateVecRealUp[thisTask];
2088 stateImagUp = stateVecImagUp[thisTask];
2090 stateRealLo = stateVecRealLo[thisTask];
2091 stateImagLo = stateVecImagLo[thisTask];
2093 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2094 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2095 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2096 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2104 long long int sizeBlock, sizeHalfBlock;
2105 long long int thisBlock,
2108 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2109 long long int thisTask;
2112 long long int chunkId=qureg.
chunkId;
2117 sizeHalfBlock = 1LL << targetQubit;
2118 sizeBlock = 2LL * sizeHalfBlock;
2127 # pragma omp parallel \
2129 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, \
2130 numTasks,chunkId,chunkSize,controlQubit) \
2131 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2135 # pragma omp for schedule (static)
2137 for (thisTask=0; thisTask<numTasks; thisTask++) {
2139 thisBlock = thisTask / sizeHalfBlock;
2140 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2141 indexLo = indexUp + sizeHalfBlock;
2143 controlBit =
extractBit (controlQubit, indexUp+chunkId*chunkSize);
2146 stateRealUp = stateVecReal[indexUp];
2147 stateImagUp = stateVecImag[indexUp];
2149 stateRealLo = stateVecReal[indexLo];
2150 stateImagLo = stateVecImag[indexLo];
2153 stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
2154 - betaReal*stateRealLo - betaImag*stateImagLo;
2155 stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
2156 - betaReal*stateImagLo + betaImag*stateRealLo;
2159 stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
2160 + alphaReal*stateRealLo + alphaImag*stateImagLo;
2161 stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
2162 + alphaReal*stateImagLo - alphaImag*stateRealLo;
2174 Qureg qureg,
int targetQubit,
2175 long long int ctrlQubitsMask,
long long int ctrlFlipMask,
2178 long long int sizeBlock, sizeHalfBlock;
2179 long long int thisBlock,
2182 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2183 long long int thisTask;
2186 long long int chunkId=qureg.
chunkId;
2189 sizeHalfBlock = 1LL << targetQubit;
2190 sizeBlock = 2LL * sizeHalfBlock;
2197 # pragma omp parallel \
2199 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u, ctrlQubitsMask,ctrlFlipMask, \
2200 numTasks,chunkId,chunkSize) \
2201 private (thisTask,thisBlock, indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2205 # pragma omp for schedule (static)
2207 for (thisTask=0; thisTask<numTasks; thisTask++) {
2209 thisBlock = thisTask / sizeHalfBlock;
2210 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2211 indexLo = indexUp + sizeHalfBlock;
2216 if (ctrlQubitsMask == (ctrlQubitsMask & ((indexUp+chunkId*chunkSize) ^ ctrlFlipMask))) {
2218 stateRealUp = stateVecReal[indexUp];
2219 stateImagUp = stateVecImag[indexUp];
2221 stateRealLo = stateVecReal[indexLo];
2222 stateImagLo = stateVecImag[indexLo];
2225 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
2226 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
2227 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
2228 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
2231 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
2232 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
2233 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
2234 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2244 long long int sizeBlock, sizeHalfBlock;
2245 long long int thisBlock,
2248 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2249 long long int thisTask;
2252 long long int chunkId=qureg.
chunkId;
2257 sizeHalfBlock = 1LL << targetQubit;
2258 sizeBlock = 2LL * sizeHalfBlock;
2265 # pragma omp parallel \
2267 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks,chunkId,chunkSize,controlQubit) \
2268 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2272 # pragma omp for schedule (static)
2274 for (thisTask=0; thisTask<numTasks; thisTask++) {
2276 thisBlock = thisTask / sizeHalfBlock;
2277 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2278 indexLo = indexUp + sizeHalfBlock;
2280 controlBit =
extractBit (controlQubit, indexUp+chunkId*chunkSize);
2283 stateRealUp = stateVecReal[indexUp];
2284 stateImagUp = stateVecImag[indexUp];
2286 stateRealLo = stateVecReal[indexLo];
2287 stateImagLo = stateVecImag[indexLo];
2291 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
2292 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
2293 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
2294 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
2297 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
2298 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
2299 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
2300 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2321 ComplexArray stateVecUp,
2322 ComplexArray stateVecLo,
2323 ComplexArray stateVecOut)
2326 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2327 long long int thisTask;
2330 long long int chunkId=qureg.
chunkId;
2336 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2337 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2338 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2341 # pragma omp parallel \
2343 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2344 rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks,chunkId,chunkSize,controlQubit) \
2345 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2349 # pragma omp for schedule (static)
2351 for (thisTask=0; thisTask<numTasks; thisTask++) {
2352 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2355 stateRealUp = stateVecRealUp[thisTask];
2356 stateImagUp = stateVecImagUp[thisTask];
2358 stateRealLo = stateVecRealLo[thisTask];
2359 stateImagLo = stateVecImagLo[thisTask];
2362 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2363 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2383 ComplexArray stateVecUp,
2384 ComplexArray stateVecLo,
2385 ComplexArray stateVecOut)
2388 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2389 long long int thisTask;
2392 long long int chunkId=qureg.
chunkId;
2398 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2399 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2400 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2403 # pragma omp parallel \
2405 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2406 rot1Real,rot1Imag, rot2Real,rot2Imag, numTasks,chunkId,chunkSize,controlQubit) \
2407 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2411 # pragma omp for schedule (static)
2413 for (thisTask=0; thisTask<numTasks; thisTask++) {
2414 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2417 stateRealUp = stateVecRealUp[thisTask];
2418 stateImagUp = stateVecImagUp[thisTask];
2420 stateRealLo = stateVecRealLo[thisTask];
2421 stateImagLo = stateVecImagLo[thisTask];
2423 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2424 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2425 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2426 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2450 long long int ctrlQubitsMask,
long long int ctrlFlipMask,
2452 ComplexArray stateVecUp,
2453 ComplexArray stateVecLo,
2454 ComplexArray stateVecOut)
2457 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2458 long long int thisTask;
2461 long long int chunkId=qureg.
chunkId;
2465 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2466 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2467 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2470 # pragma omp parallel \
2472 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2473 rot1Real,rot1Imag, rot2Real,rot2Imag, ctrlQubitsMask,ctrlFlipMask, numTasks,chunkId,chunkSize) \
2474 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2478 # pragma omp for schedule (static)
2480 for (thisTask=0; thisTask<numTasks; thisTask++) {
2481 if (ctrlQubitsMask == (ctrlQubitsMask & ((thisTask+chunkId*chunkSize) ^ ctrlFlipMask))) {
2483 stateRealUp = stateVecRealUp[thisTask];
2484 stateImagUp = stateVecImagUp[thisTask];
2486 stateRealLo = stateVecRealLo[thisTask];
2487 stateImagLo = stateVecImagLo[thisTask];
2489 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2490 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2491 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2492 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2500 long long int sizeBlock, sizeHalfBlock;
2501 long long int thisBlock,
2504 qreal stateRealUp,stateImagUp;
2505 long long int thisTask;
2509 sizeHalfBlock = 1LL << targetQubit;
2510 sizeBlock = 2LL * sizeHalfBlock;
2517 # pragma omp parallel \
2519 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks) \
2520 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2524 # pragma omp for schedule (static)
2526 for (thisTask=0; thisTask<numTasks; thisTask++) {
2527 thisBlock = thisTask / sizeHalfBlock;
2528 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2529 indexLo = indexUp + sizeHalfBlock;
2531 stateRealUp = stateVecReal[indexUp];
2532 stateImagUp = stateVecImag[indexUp];
2534 stateVecReal[indexUp] = stateVecReal[indexLo];
2535 stateVecImag[indexUp] = stateVecImag[indexLo];
2537 stateVecReal[indexLo] = stateRealUp;
2538 stateVecImag[indexLo] = stateImagUp;
2557 ComplexArray stateVecIn,
2558 ComplexArray stateVecOut)
2561 long long int thisTask;
2564 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2565 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2568 # pragma omp parallel \
2570 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut,numTasks) \
2575 # pragma omp for schedule (static)
2577 for (thisTask=0; thisTask<numTasks; thisTask++) {
2578 stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2579 stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2586 long long int sizeBlock, sizeHalfBlock;
2587 long long int thisBlock,
2590 qreal stateRealUp,stateImagUp;
2591 long long int thisTask;
2594 long long int chunkId=qureg.
chunkId;
2599 sizeHalfBlock = 1LL << targetQubit;
2600 sizeBlock = 2LL * sizeHalfBlock;
2607 # pragma omp parallel \
2609 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,numTasks,chunkId,chunkSize,controlQubit) \
2610 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2614 # pragma omp for schedule (static)
2616 for (thisTask=0; thisTask<numTasks; thisTask++) {
2617 thisBlock = thisTask / sizeHalfBlock;
2618 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2619 indexLo = indexUp + sizeHalfBlock;
2621 controlBit =
extractBit(controlQubit, indexUp+chunkId*chunkSize);
2623 stateRealUp = stateVecReal[indexUp];
2624 stateImagUp = stateVecImag[indexUp];
2626 stateVecReal[indexUp] = stateVecReal[indexLo];
2627 stateVecImag[indexUp] = stateVecImag[indexLo];
2629 stateVecReal[indexLo] = stateRealUp;
2630 stateVecImag[indexLo] = stateImagUp;
2647 ComplexArray stateVecIn,
2648 ComplexArray stateVecOut)
2651 long long int thisTask;
2654 long long int chunkId=qureg.
chunkId;
2658 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2659 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2662 # pragma omp parallel \
2664 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2665 numTasks,chunkId,chunkSize,controlQubit) \
2666 private (thisTask,controlBit)
2670 # pragma omp for schedule (static)
2672 for (thisTask=0; thisTask<numTasks; thisTask++) {
2673 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2675 stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2676 stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2684 long long int sizeBlock, sizeHalfBlock;
2685 long long int thisBlock,
2688 qreal stateRealUp,stateImagUp;
2689 long long int thisTask;
2693 sizeHalfBlock = 1LL << targetQubit;
2694 sizeBlock = 2LL * sizeHalfBlock;
2701 # pragma omp parallel \
2703 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,conjFac) \
2704 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2708 # pragma omp for schedule (static)
2710 for (thisTask=0; thisTask<numTasks; thisTask++) {
2711 thisBlock = thisTask / sizeHalfBlock;
2712 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2713 indexLo = indexUp + sizeHalfBlock;
2715 stateRealUp = stateVecReal[indexUp];
2716 stateImagUp = stateVecImag[indexUp];
2718 stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2719 stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2720 stateVecReal[indexLo] = conjFac * -stateImagUp;
2721 stateVecImag[indexLo] = conjFac * stateRealUp;
2740 ComplexArray stateVecIn,
2741 ComplexArray stateVecOut,
2742 int updateUpper,
int conjFac)
2745 long long int thisTask;
2748 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2749 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2751 int realSign=1, imagSign=1;
2752 if (updateUpper) imagSign=-1;
2756 # pragma omp parallel \
2758 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2759 realSign,imagSign, numTasks,conjFac) \
2764 # pragma omp for schedule (static)
2766 for (thisTask=0; thisTask<numTasks; thisTask++) {
2767 stateVecRealOut[thisTask] = conjFac * realSign * stateVecImagIn[thisTask];
2768 stateVecImagOut[thisTask] = conjFac * imagSign * stateVecRealIn[thisTask];
2778 long long int sizeBlock, sizeHalfBlock;
2779 long long int thisBlock,
2782 qreal stateRealUp,stateImagUp;
2783 long long int thisTask;
2786 long long int chunkId=qureg.
chunkId;
2791 sizeHalfBlock = 1LL << targetQubit;
2792 sizeBlock = 2LL * sizeHalfBlock;
2799 # pragma omp parallel \
2801 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,chunkId, \
2802 chunkSize,controlQubit,conjFac) \
2803 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2807 # pragma omp for schedule (static)
2809 for (thisTask=0; thisTask<numTasks; thisTask++) {
2810 thisBlock = thisTask / sizeHalfBlock;
2811 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2812 indexLo = indexUp + sizeHalfBlock;
2814 controlBit =
extractBit(controlQubit, indexUp+chunkId*chunkSize);
2816 stateRealUp = stateVecReal[indexUp];
2817 stateImagUp = stateVecImag[indexUp];
2820 stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2821 stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2822 stateVecReal[indexLo] = conjFac * -stateImagUp;
2823 stateVecImag[indexLo] = conjFac * stateRealUp;
2831 ComplexArray stateVecIn,
2832 ComplexArray stateVecOut,
int conjFac)
2835 long long int thisTask;
2838 long long int chunkId=qureg.
chunkId;
2842 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2843 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2846 # pragma omp parallel \
2848 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2849 numTasks,chunkId,chunkSize,controlQubit,conjFac) \
2850 private (thisTask,controlBit)
2854 # pragma omp for schedule (static)
2856 for (thisTask=0; thisTask<numTasks; thisTask++) {
2857 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2859 stateVecRealOut[thisTask] = conjFac * stateVecImagIn[thisTask];
2860 stateVecImagOut[thisTask] = conjFac * -stateVecRealIn[thisTask];
2874 long long int sizeBlock, sizeHalfBlock;
2875 long long int thisBlock,
2878 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2879 long long int thisTask;
2883 sizeHalfBlock = 1LL << targetQubit;
2884 sizeBlock = 2LL * sizeHalfBlock;
2890 qreal recRoot2 = 1.0/sqrt(2);
2893 # pragma omp parallel \
2895 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, recRoot2, numTasks) \
2896 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2900 # pragma omp for schedule (static)
2902 for (thisTask=0; thisTask<numTasks; thisTask++) {
2903 thisBlock = thisTask / sizeHalfBlock;
2904 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2905 indexLo = indexUp + sizeHalfBlock;
2907 stateRealUp = stateVecReal[indexUp];
2908 stateImagUp = stateVecImag[indexUp];
2910 stateRealLo = stateVecReal[indexLo];
2911 stateImagLo = stateVecImag[indexLo];
2913 stateVecReal[indexUp] = recRoot2*(stateRealUp + stateRealLo);
2914 stateVecImag[indexUp] = recRoot2*(stateImagUp + stateImagLo);
2916 stateVecReal[indexLo] = recRoot2*(stateRealUp - stateRealLo);
2917 stateVecImag[indexLo] = recRoot2*(stateImagUp - stateImagLo);
2933 ComplexArray stateVecUp,
2934 ComplexArray stateVecLo,
2935 ComplexArray stateVecOut,
2939 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2940 long long int thisTask;
2944 if (updateUpper) sign=1;
2947 qreal recRoot2 = 1.0/sqrt(2);
2949 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2950 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2951 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2954 # pragma omp parallel \
2956 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2957 recRoot2, sign, numTasks) \
2958 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2962 # pragma omp for schedule (static)
2964 for (thisTask=0; thisTask<numTasks; thisTask++) {
2966 stateRealUp = stateVecRealUp[thisTask];
2967 stateImagUp = stateVecImagUp[thisTask];
2969 stateRealLo = stateVecRealLo[thisTask];
2970 stateImagLo = stateVecImagLo[thisTask];
2972 stateVecRealOut[thisTask] = recRoot2*(stateRealUp + sign*stateRealLo);
2973 stateVecImagOut[thisTask] = recRoot2*(stateImagUp + sign*stateImagLo);
2980 long long int index;
2981 long long int stateVecSize;
2985 long long int chunkId=qureg.
chunkId;
2992 qreal stateRealLo, stateImagLo;
2997 # pragma omp parallel for \
2999 shared (stateVecSize, stateVecReal,stateVecImag, cosAngle,sinAngle, \
3000 chunkId,chunkSize,targetQubit) \
3001 private (index,targetBit,stateRealLo,stateImagLo) \
3004 for (index=0; index<stateVecSize; index++) {
3007 targetBit =
extractBit (targetQubit, index+chunkId*chunkSize);
3010 stateRealLo = stateVecReal[index];
3011 stateImagLo = stateVecImag[index];
3013 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3014 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3021 long long int index;
3022 long long int stateVecSize;
3026 long long int chunkId=qureg.
chunkId;
3033 qreal stateRealLo, stateImagLo;
3034 qreal cosAngle = cos(angle);
3035 qreal sinAngle = sin(angle);
3038 # pragma omp parallel for \
3040 shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize, \
3041 idQubit1,idQubit2,cosAngle,sinAngle ) \
3042 private (index,bit1,bit2,stateRealLo,stateImagLo) \
3045 for (index=0; index<stateVecSize; index++) {
3046 bit1 =
extractBit (idQubit1, index+chunkId*chunkSize);
3047 bit2 =
extractBit (idQubit2, index+chunkId*chunkSize);
3050 stateRealLo = stateVecReal[index];
3051 stateImagLo = stateVecImag[index];
3053 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3054 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3061 long long int index;
3062 long long int stateVecSize;
3065 long long int chunkId=qureg.
chunkId;
3067 long long int mask =
getQubitBitMask(controlQubits, numControlQubits);
3073 qreal stateRealLo, stateImagLo;
3074 qreal cosAngle = cos(angle);
3075 qreal sinAngle = sin(angle);
3078 # pragma omp parallel \
3080 shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3081 private (index, stateRealLo, stateImagLo)
3085 # pragma omp for schedule (static)
3087 for (index=0; index<stateVecSize; index++) {
3088 if (mask == (mask & (index+chunkId*chunkSize)) ){
3090 stateRealLo = stateVecReal[index];
3091 stateImagLo = stateVecImag[index];
3093 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3094 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3104 mask = mask & (mask-1);
3111 long long int index;
3112 long long int stateVecSize;
3115 long long int chunkId=qureg.
chunkId;
3121 qreal stateReal, stateImag;
3122 qreal cosAngle = cos(angle/2.0);
3123 qreal sinAngle = sin(angle/2.0);
3130 # pragma omp parallel \
3132 shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3133 private (index, fac, stateReal, stateImag)
3137 # pragma omp for schedule (static)
3139 for (index=0; index<stateVecSize; index++) {
3140 stateReal = stateVecReal[index];
3141 stateImag = stateVecImag[index];
3145 stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3146 stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3156 long long int diagSpacing = 1LL + densityDim;
3157 long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3158 long long int numPrevDiags = (qureg.
chunkId>0)? 1+(qureg.
chunkId*localNumAmps)/diagSpacing : 0;
3159 long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3160 long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3163 long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3164 if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3165 numDiagsInThisChunk -= 1;
3167 long long int visitedDiags;
3168 long long int basisStateInd;
3169 long long int index;
3175 # pragma omp parallel \
3176 shared (localIndNextDiag, numPrevDiags, diagSpacing, stateVecReal, numDiagsInThisChunk) \
3177 private (visitedDiags, basisStateInd, index) \
3178 reduction ( +:zeroProb )
3182 # pragma omp for schedule (static)
3185 for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3187 basisStateInd = numPrevDiags + visitedDiags;
3188 index = localIndNextDiag + diagSpacing * visitedDiags;
3190 if (
extractBit(measureQubit, basisStateInd) == 0)
3191 zeroProb += stateVecReal[index];
3210 long long int sizeBlock,
3213 long long int thisBlock,
3216 qreal totalProbability;
3218 long long int thisTask;
3224 sizeHalfBlock = 1LL << (measureQubit);
3226 sizeBlock = 2LL * sizeHalfBlock;
3229 totalProbability = 0.0;
3235 # pragma omp parallel \
3236 shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag) \
3237 private (thisTask,thisBlock,index) \
3238 reduction ( +:totalProbability )
3242 # pragma omp for schedule (static)
3244 for (thisTask=0; thisTask<numTasks; thisTask++) {
3245 thisBlock = thisTask / sizeHalfBlock;
3246 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3248 totalProbability += stateVecReal[index]*stateVecReal[index]
3249 + stateVecImag[index]*stateVecImag[index];
3252 return totalProbability;
3264 qreal totalProbability;
3266 long long int thisTask;
3274 totalProbability = 0.0;
3280 # pragma omp parallel \
3281 shared (numTasks,stateVecReal,stateVecImag) \
3282 private (thisTask) \
3283 reduction ( +:totalProbability )
3287 # pragma omp for schedule (static)
3289 for (thisTask=0; thisTask<numTasks; thisTask++) {
3290 totalProbability += stateVecReal[thisTask]*stateVecReal[thisTask]
3291 + stateVecImag[thisTask]*stateVecImag[thisTask];
3295 return totalProbability;
3302 long long int index;
3303 long long int stateVecSize;
3307 long long int chunkId=qureg.
chunkId;
3315 # pragma omp parallel for \
3317 shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize,idQubit1,idQubit2 ) \
3318 private (index,bit1,bit2) \
3321 for (index=0; index<stateVecSize; index++) {
3322 bit1 =
extractBit (idQubit1, index+chunkId*chunkSize);
3323 bit2 =
extractBit (idQubit2, index+chunkId*chunkSize);
3325 stateVecReal [index] = - stateVecReal [index];
3326 stateVecImag [index] = - stateVecImag [index];
3333 long long int index;
3334 long long int stateVecSize;
3337 long long int chunkId=qureg.
chunkId;
3339 long long int mask =
getQubitBitMask(controlQubits, numControlQubits);
3346 # pragma omp parallel \
3348 shared (stateVecSize, stateVecReal,stateVecImag, mask, chunkId,chunkSize ) \
3353 # pragma omp for schedule (static)
3355 for (index=0; index<stateVecSize; index++) {
3356 if (mask == (mask & (index+chunkId*chunkSize)) ){
3357 stateVecReal [index] = - stateVecReal [index];
3358 stateVecImag [index] = - stateVecImag [index];
3383 long long int sizeBlock,
3386 long long int thisBlock,
3391 long long int thisTask;
3398 sizeHalfBlock = 1LL << (measureQubit);
3400 sizeBlock = 2LL * sizeHalfBlock;
3402 renorm=1/sqrt(totalProbability);
3408 # pragma omp parallel \
3410 shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,renorm,outcome) \
3411 private (thisTask,thisBlock,index)
3417 # pragma omp for schedule (static)
3419 for (thisTask=0; thisTask<numTasks; thisTask++) {
3420 thisBlock = thisTask / sizeHalfBlock;
3421 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3422 stateVecReal[index]=stateVecReal[index]*renorm;
3423 stateVecImag[index]=stateVecImag[index]*renorm;
3425 stateVecReal[index+sizeHalfBlock]=0;
3426 stateVecImag[index+sizeHalfBlock]=0;
3431 # pragma omp for schedule (static)
3433 for (thisTask=0; thisTask<numTasks; thisTask++) {
3434 thisBlock = thisTask / sizeHalfBlock;
3435 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3436 stateVecReal[index]=0;
3437 stateVecImag[index]=0;
3439 stateVecReal[index+sizeHalfBlock]=stateVecReal[index+sizeHalfBlock]*renorm;
3440 stateVecImag[index+sizeHalfBlock]=stateVecImag[index+sizeHalfBlock]*renorm;
3465 long long int thisTask;
3468 qreal renorm=1/sqrt(totalProbability);
3474 # pragma omp parallel \
3475 shared (numTasks,stateVecReal,stateVecImag) \
3480 # pragma omp for schedule (static)
3482 for (thisTask=0; thisTask<numTasks; thisTask++) {
3483 stateVecReal[thisTask] = stateVecReal[thisTask]*renorm;
3484 stateVecImag[thisTask] = stateVecImag[thisTask]*renorm;
3504 long long int thisTask;
3515 # pragma omp parallel \
3516 shared (numTasks,stateVecReal,stateVecImag) \
3521 # pragma omp for schedule (static)
3523 for (thisTask=0; thisTask<numTasks; thisTask++) {
3524 stateVecReal[thisTask] = 0;
3525 stateVecImag[thisTask] = 0;
3543 long long int thisTask;
3544 long long int ind00, ind01, ind10;
3549 # pragma omp parallel \
3551 shared (reVec,imVec,numTasks,qb1,qb2) \
3552 private (thisTask, ind00,ind01,ind10, re01,re10, im01,im10)
3556 # pragma omp for schedule (static)
3558 for (thisTask=0; thisTask<numTasks; thisTask++) {
3565 re01 = reVec[ind01]; im01 = imVec[ind01];
3566 re10 = reVec[ind10]; im10 = imVec[ind10];
3569 reVec[ind01] = re10; reVec[ind10] = re01;
3570 imVec[ind01] = im10; imVec[ind10] = im01;
3588 long long int globalStartInd = qureg.
chunkId * numLocalAmps;
3589 long long int pairGlobalStartInd = pairRank * numLocalAmps;
3591 long long int localInd, globalInd;
3592 long long int pairLocalInd, pairGlobalInd;
3595 # pragma omp parallel \
3597 shared (reVec,imVec,rePairVec,imPairVec,numLocalAmps,globalStartInd,pairGlobalStartInd,qb1,qb2) \
3598 private (localInd,globalInd, pairLocalInd,pairGlobalInd)
3602 # pragma omp for schedule (static)
3604 for (localInd=0; localInd < numLocalAmps; localInd++) {
3606 globalInd = globalStartInd + localInd;
3610 pairLocalInd = pairGlobalInd - pairGlobalStartInd;
3612 reVec[localInd] = rePairVec[pairLocalInd];
3613 imVec[localInd] = imPairVec[pairLocalInd];
3637 qreal re1,im1, re2,im2, reOut,imOut;
3638 long long int index;
3641 # pragma omp parallel \
3642 shared (vecRe1,vecIm1, vecRe2,vecIm2, vecReOut,vecImOut, facRe1,facIm1,facRe2,facIm2, numAmps) \
3643 private (index, re1,im1, re2,im2, reOut,imOut)
3647 # pragma omp for schedule (static)
3649 for (index=0LL; index<numAmps; index++) {
3650 re1 = vecRe1[index]; im1 = vecIm1[index];
3651 re2 = vecRe2[index]; im2 = vecIm2[index];
3652 reOut = vecReOut[index];
3653 imOut = vecImOut[index];
3655 vecReOut[index] = (facReOut*reOut - facImOut*imOut) + (facRe1*re1 - facIm1*im1) + (facRe2*re2 - facIm2*im2);
3656 vecImOut[index] = (facReOut*imOut + facImOut*reOut) + (facRe1*im1 + facIm1*re1) + (facRe2*im2 + facIm2*re2);
3672 long long int index;
3675 # pragma omp parallel \
3676 shared (stateRe,stateIm, opRe,opIm, numAmps) \
3677 private (index, a,b,c,d)
3681 # pragma omp for schedule (static)
3683 for (index=0LL; index<numAmps; index++) {
3690 stateRe[index] = a*c - b*d;
3691 stateIm[index] = a*d + b*c;
3714 long long int index;
3717 # pragma omp parallel \
3718 shared (stateRe,stateIm, opRe,opIm, numAmps,opDim) \
3719 private (index, a,b,c,d)
3723 # pragma omp for schedule (static)
3725 for (index=0LL; index<numAmps; index++) {
3728 c = opRe[index % opDim];
3729 d = opIm[index % opDim];
3732 stateRe[index] = a*c - b*d;
3733 stateIm[index] = a*d + b*c;
3743 long long int index;
3750 qreal vecRe,vecIm,vecAbs, opRe, opIm;
3753 # pragma omp parallel \
3754 shared (stateReal, stateImag, opReal, opImag, numAmps) \
3755 private (index, vecRe,vecIm,vecAbs, opRe,opIm) \
3756 reduction ( +:expecRe, expecIm )
3760 # pragma omp for schedule (static)
3762 for (index=0; index < numAmps; index++) {
3763 vecRe = stateReal[index];
3764 vecIm = stateImag[index];
3765 opRe = opReal[index];
3766 opIm = opImag[index];
3769 vecAbs = vecRe*vecRe + vecIm*vecIm;
3770 expecRe += vecAbs*opRe;
3771 expecIm += vecAbs*opIm;
3776 innerProd.
real = expecRe;
3777 innerProd.
imag = expecIm;
3793 long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3794 long long int localIndNextDiag = globalIndNextDiag % qureg.
numAmpsPerChunk;
3805 long long int stateInd;
3806 long long int opInd;
3807 qreal matRe, matIm, opRe, opIm;
3812 # pragma omp parallel \
3813 shared (stateReal,stateImag, opReal,opImag, localIndNextDiag,diagSpacing,numAmps) \
3814 private (stateInd,opInd, matRe,matIm, opRe,opIm) \
3815 reduction ( +:expecRe, expecIm )
3819 # pragma omp for schedule (static)
3821 for (stateInd=localIndNextDiag; stateInd < numAmps; stateInd += diagSpacing) {
3823 matRe = stateReal[stateInd];
3824 matIm = stateImag[stateInd];
3825 opInd = (stateInd - localIndNextDiag) / diagSpacing;
3826 opRe = opReal[opInd];
3827 opIm = opImag[opInd];
3831 expecRe += matRe * opRe - matIm * opIm;
3832 expecIm += matRe * opIm + matIm * opRe;
3837 expecVal.
real = expecRe;
3838 expecVal.
imag = expecIm;
3847 long long int localEndInd = localStartInd + numElems;
3853 if (localStartInd < 0)
3860 long long int index;
3865 # pragma omp parallel \
3867 shared (localStartInd,localEndInd, vecRe,vecIm, real,imag, offset) \
3872 # pragma omp for schedule (static)
3875 for (index=localStartInd; index < localEndInd; index++) {
3876 vecRe[index] = real[index + offset];
3877 vecIm[index] = imag[index + offset];