3-phase PV router
Loading...
Searching...
No Matches
FastDivision.cpp
Go to the documentation of this file.
1#include "FastDivision.h"
2
3unsigned int divu10(unsigned int n)
4{
5 unsigned long working;
6 asm volatile(
7 "ldi %A1, %3 \n\t"
8 "ldi %C1, 0x00 \n\t"
9 "ldi %D1, 0x00 \n\t"
10
11 "lsr %B0 \n\t" // we have divided by 2, now divide by 5.
12 "ror %A0 \n\t"
13
14 "subi %A0, 0xFF \n\t" //n++
15 "sbci %B0, 0xFF \n\t"
16
17 "mul %A0, %A1 \n\t" //A * X
18 "mov %B1, r1 \n\t"
19
20 "add %B1, r0 \n\t" //A* X'
21 "adc %C1, r1 \n\t"
22
23 "mul %B0, %A1 \n\t"
24 "add %B1, r0 \n\t" //A* X'
25 "adc %C1, r1 \n\t"
26 "adc %D1, %D1 \n\t" //D1 is known 0, we need to grab the carry
27
28 "add %C1, r0 \n\t" //A* X'
29 "adc %D1, r1 \n\t"
30
31 "movw %A0, %C1 \n\t" // >> 16
32
33 "eor r1, r1 \n\t"
34
35 : "=r"(n), "=r"(working)
36 : "0"(n), "M"(0x33)
37 : "r1", "r0");
38 return n;
39}
40
41unsigned int divu7(unsigned int n)
42{
43 unsigned long working;
44 asm volatile(
45 "ldi %A1, %3 \n\t"
46 "ldi %B1, 0xFF \n\t"
47 "ldi %D1, 0x00 \n\t"
48 "cpi %A0, 0xFE \n\t"
49 "cpc %B0, %B1 \n\t"
50 "brlo 1f \n\t"
51 "ldi %C1, 0x01 \n\t" //final answer++
52 "rjmp 2f \n\t"
53 "1: \n\t"
54 "subi %A0, 0xFF \n\t" //n++
55 "sbci %B0, 0xFF \n\t"
56 "ldi %C1, 0x00 \n\t"
57 "2: \n\t"
58
59 "mul %A0, %A1 \n\t" //A * X
60 "mov %B1, r1 \n\t"
61
62 "mul %B0, %A1 \n\t"
63 "add %B1, r0 \n\t" //B * X
64 "adc %C1, r1 \n\t"
65
66 "ldi %A1, %4 \n\t"
67 "mul %A0, %A1 \n\t" //A * X'
68 "add %B1, r0 \n\t"
69 "adc %C1, r1 \n\t"
70 "adc %D1, %D1 \n\t" //D1 is known 0, we need to grab the carry
71
72 "mul %B0, %A1 \n\t"
73 "add %C1, r0 \n\t" //B* X'
74 "adc %D1, r1 \n\t"
75
76 "movw %A0, %C1 \n\t" // >> 16
77
78 "eor r1, r1 \n\t"
79
80 "lsr %B0 \n\t" // we have divided by 1.75, now divide by 4.
81 "ror %A0 \n\t"
82 "lsr %B0 \n\t"
83 "ror %A0 \n\t"
84
85 : "=r"(n), "=r"(working)
86 : "0"(n), "M"(0x49), "M"(0x92)
87 : "r1", "r0");
88 return n;
89}
90
91unsigned int divu9(unsigned int n)
92{
93 unsigned long working;
94 asm volatile(
95 "ldi %A1, %3 \n\t"
96 "ldi %B1, 0xFF \n\t"
97 "ldi %D1, 0x00 \n\t"
98 "cpi %A0, 0xFF \n\t"
99 "cpc %B0, %B1 \n\t"
100 "brlo 1f \n\t"
101 "ldi %C1, 0x01 \n\t" //final answer++
102 "rjmp 2f \n\t"
103 "1: \n\t"
104 "subi %A0, 0xFF \n\t" //n++
105 "sbci %B0, 0xFF \n\t"
106 "ldi %C1, 0x00 \n\t"
107 "2: \n\t"
108
109 "mul %A0, %A1 \n\t" //A * X
110 "mov %B1, r1 \n\t"
111
112 "mul %B0, %A1 \n\t"
113 "add %B1, r0 \n\t" //B * X
114 "adc %C1, r1 \n\t"
115
116 "ldi %A1, %4 \n\t"
117 "mul %A0, %A1 \n\t" //A * X'
118 "add %B1, r0 \n\t"
119 "adc %C1, r1 \n\t"
120 "adc %D1, %D1 \n\t" //D1 is known 0, we need to grab the carry
121
122 "mul %B0, %A1 \n\t"
123 "add %C1, r0 \n\t" //B* X'
124 "adc %D1, r1 \n\t"
125
126 "movw %A0, %C1 \n\t" // >> 16
127
128 "eor r1, r1 \n\t"
129
130 "lsr %B0 \n\t" // we have divided by 2.25, now divide by 4.
131 "ror %A0 \n\t"
132 "lsr %B0 \n\t"
133 "ror %A0 \n\t"
134
135 : "=r"(n), "=r"(working)
136 : "0"(n), "M"(0xC7), "M"(0x71)
137 : "r1", "r0");
138 return n;
139}
140
141unsigned int divu11(unsigned int n)
142{
143 unsigned long working;
144 asm volatile(
145 "ldi %A1, %3 \n\t"
146 "ldi %B1, 0xFF \n\t"
147 "ldi %D1, 0x00 \n\t"
148 "cpi %A0, 0xFF \n\t"
149 "cpc %B0, %B1 \n\t"
150 "brlo 1f \n\t"
151 "ldi %C1, 0x01 \n\t" //final answer++
152 "rjmp 2f \n\t"
153 "1: \n\t"
154 "subi %A0, 0xFF \n\t" //n++
155 "sbci %B0, 0xFF \n\t"
156 "ldi %C1, 0x00 \n\t"
157 "2: \n\t"
158
159 "mul %A0, %A1 \n\t" //A * X
160 "mov %B1, r1 \n\t"
161
162 "mul %B0, %A1 \n\t"
163 "add %B1, r0 \n\t" //B * X
164 "adc %C1, r1 \n\t"
165
166 "ldi %A1, %4 \n\t"
167 "mul %A0, %A1 \n\t" //A * X'
168 "add %B1, r0 \n\t"
169 "adc %C1, r1 \n\t"
170 "adc %D1, %D1 \n\t" //D1 is known 0, we need to grab the carry
171
172 "mul %B0, %A1 \n\t"
173 "add %C1, r0 \n\t" //B* X'
174 "adc %D1, r1 \n\t"
175
176 "movw %A0, %C1 \n\t" // >> 16
177
178 "eor r1, r1 \n\t"
179
180 "lsr %B0 \n\t" // we have divided by 2.75, now divide by 4.
181 "ror %A0 \n\t"
182 "lsr %B0 \n\t"
183 "ror %A0 \n\t"
184
185 : "=r"(n), "=r"(working)
186 : "0"(n), "M"(0x17), "M"(0x5D)
187 : "r1", "r0");
188 return n;
189}
190
191unsigned int divu14(unsigned int n)
192{
193 unsigned long working;
194 asm volatile(
195 "ldi %A1, %3 \n\t"
196 "ldi %B1, 0xFF \n\t"
197 "ldi %D1, 0x00 \n\t"
198
199 "lsr %B0 \n\t"
200 "ror %A0 \n\t" // we have divided by 2, now divide by 7.
201
202 "cpi %A0, 0xFE \n\t"
203 "cpc %B0, %B1 \n\t"
204 "brlo 1f \n\t"
205 "ldi %C1, 0x01 \n\t" //final answer++
206 "rjmp 2f \n\t"
207 "1: \n\t"
208 "subi %A0, 0xFF \n\t" //n++
209 "sbci %B0, 0xFF \n\t"
210 "ldi %C1, 0x00 \n\t"
211 "2: \n\t"
212
213 "mul %A0, %A1 \n\t" //A * X
214 "mov %B1, r1 \n\t"
215
216 "mul %B0, %A1 \n\t"
217 "add %B1, r0 \n\t" //B * X
218 "adc %C1, r1 \n\t"
219
220 "ldi %A1, %4 \n\t"
221 "mul %A0, %A1 \n\t" //A * X'
222 "add %B1, r0 \n\t"
223 "adc %C1, r1 \n\t"
224 "adc %D1, %D1 \n\t" //D1 is known 0, we need to grab the carry
225
226 "mul %B0, %A1 \n\t"
227 "add %C1, r0 \n\t" //B* X'
228 "adc %D1, r1 \n\t"
229
230 "movw %A0, %C1 \n\t" // >> 16
231
232 "eor r1, r1 \n\t"
233
234 "lsr %B0 \n\t" // we have divided by 7, now divide by 2.
235 "ror %A0 \n\t"
236
237 : "=r"(n), "=r"(working)
238 : "0"(n), "M"(0x24), "M"(0x49)
239 : "r1", "r0");
240 return n;
241}
242
243unsigned int divu24(unsigned int n)
244{
245 //unsigned long working;
246 asm volatile(
247 "lsr %B0 \n\t" // we have divided by 2, now divide by 12.
248 "ror %A0 \n\t"
249 ".global divu12helper \n\t"
250 "divu12helper: \n\t"
251 "lsr %B0 \n\t" // we have divided by 2, now divide by 6.
252 "ror %A0 \n\t"
253 ".global divu6helper \n\t"
254 "divu6helper: \n\t"
255 "lsr %B0 \n\t" // we have divided by 2, now divide by 3.
256 "ror %A0 \n\t"
257
258 "movw r18, r24 \n\t"
259 "rjmp divu3helper \n\t"
260 :
261 : "r"(n)
262 : "r1", "r0", "r18", "r19");
263 return n;
264}
265unsigned int divu3(unsigned int n)
266{
267 //unsigned long working;
268 asm volatile(
269 "ldi r25, 0xFF \n\t"
270 "cpi %A0, 0xFF \n\t"
271 "cpc %B0, r25 \n\t"
272 "brlo divu3helper \n\t"
273 "ldi r26, 0x01 \n\t" //final answer++
274 "rjmp 1f \n\t"
275 "divu3helper: \n\t"
276 "subi %A0, 0xFF \n\t" //n++
277 "sbci %B0, 0xFF \n\t"
278 "ldi r26, 0x00 \n\t"
279 "1: \n\t"
280 "ldi r24, %1 \n\t"
281 "rjmp divuhelper \n\t"
282
283 :
284 : "r"(n), "M"(0x55)
285 : "r1", "r0", "r24", "r25", "r26", "r27");
286 return n;
287}
288
289unsigned int divu20(unsigned int n)
290{
291 //unsigned long working;
292 asm volatile(
293 "lsr %B0 \n\t" // we have divided by 2, now divide by 10.
294 "ror %A0 \n\t"
295 ".global divu10helper \n\t"
296 "divu10helper: \n\t"
297 "lsr %B0 \n\t" // we have divided by 2, now divide by 5.
298 "ror %A0 \n\t"
299
300 "movw r18, r24 \n\t"
301 "rjmp divu5helper \n\t"
302 :
303 : "r"(n)
304 : "r1", "r0", "r18", "r19");
305 return n;
306}
307unsigned int divu5(unsigned int n)
308{
309 asm volatile(
310 "ldi r25, 0xFF \n\t"
311 "cpi %A0, 0xFF \n\t"
312 "cpc %B0, r25 \n\t"
313 "brlo divu5helper \n\t"
314 "ldi r26, 0x01 \n\t" //final answer++
315 "rjmp 1f \n\t"
316 "divu5helper: \n\t"
317 "subi %A0, 0xFF \n\t" //n++
318 "sbci %B0, 0xFF \n\t"
319 "ldi r26, 0x00 \n\t"
320 "1: \n\t"
321 "ldi r24, %1 \n\t"
322 "rjmp divuhelper \n\t"
323
324 :
325 : "r"(n), "M"(0x33)
326 : "r1", "r0", "r24", "r25", "r26", "r27");
327 return n;
328}
329
330unsigned int divu60(unsigned int n)
331{
332 asm volatile(
333 "lsr %B0 \n\t" // we have divided by 2, now divide by 30.
334 "ror %A0 \n\t"
335 ".global divu30helper \n\t"
336 "divu30helper: \n\t"
337 "lsr %B0 \n\t" // we have divided by 2, now divide by 15.
338 "ror %A0 \n\t"
339
340 "movw r18, r24 \n\t"
341 "rjmp divu15helper \n\t"
342 :
343 : "r"(n)
344 : "r1", "r0", "r18", "r19", "r26", "r27");
345 return n;
346}
347unsigned int divu15(unsigned int n)
348{
349 asm volatile(
350 "ldi r25, 0xFF \n\t"
351 "cpi %A0, 0xFF \n\t"
352 "cpc %B0, r25 \n\t"
353 "brlo divu15helper \n\t"
354 "ldi r26, 0x01 \n\t" //final answer++
355 "rjmp 1f \n\t"
356 "divu15helper: \n\t"
357 "subi %A0, 0xFF \n\t" //n++
358 "sbci %B0, 0xFF \n\t"
359 "ldi r26, 0x00 \n\t"
360 "1: \n\t"
361 "ldi r24, %2 \n\t"
362 "divuhelper: \n\t" //division code for all of: 3,5,6,10,12,15,20,24,30,60. Now that's efficiency!
363 "ldi r27, 0x00 \n\t"
364 "mul %A0, r24 \n\t" //A * X
365 "mov r25, r1 \n\t"
366
367 "add r25, r0 \n\t" //A* X'
368 "adc r26, r1 \n\t"
369
370 "mul %B0, r24 \n\t"
371 "add r25, r0 \n\t" //A* X'
372 "adc r26, r1 \n\t"
373 "adc r27, r27 \n\t" //D1 is known 0, we need to grab the carry
374
375 "add r26, r0 \n\t" //A* X'
376 "adc r27, r1 \n\t"
377
378 "movw %A0, r26 \n\t" // >> 16
379
380 "eor r1, r1 \n\t"
381
382 : "=r"(n)
383 : "0"(n), "M"(0x11)
384 : "r1", "r0", "r24", "r25", "r26", "r27");
385 return n;
386}
387
388void divmod10(uint32_t in, uint32_t &div, uint8_t &mod)
389{
390 //assumes that div/mod pointers arrive in r18:r19 and r20:r21 pairs (doesn't matter which way around)
391 //and that in arrives in r22:r25 quad
392 asm volatile(
393 "movw r30, %2 \n\t" //uint32_t* divPtr = ÷
394 "movw r26, %1 \n\t" //uint32_t* modPtr = &mod;
395
396 "mov r0, %A0 \n\t" //byte temp = in
397 "movw r18, %A0 \n\t" //uint32_t q = in;
398 "movw r20, %C0 \n\t"
399 "ori r18, 0x01 \n\t" //q |= 1;
400
401 "lsr r25 \n\t" //x = in >> 2 //note: x reuses registers of 'in', as 'in' was backed up in r0
402 "ror r24 \n\t"
403 "ror r23 \n\t"
404 "ror r22 \n\t"
405 "lsr r25 \n\t"
406 "ror r24 \n\t"
407 "ror r23 \n\t"
408 "ror r22 \n\t"
409
410 "sub r18, r22 \n\t" //q = q - x;
411 "sbc r19, r23 \n\t"
412 "sbc r20, r24 \n\t"
413 "sbc r21, r25 \n\t"
414
415 "movw r22, r18 \n\t" //x = q;
416 "movw r24, r20 \n\t"
417 "lsr r25 \n\t" //x = x >> 4;
418 "ror r24 \n\t"
419 "ror r23 \n\t"
420 "ror r22 \n\t"
421 "lsr r25 \n\t"
422 "ror r24 \n\t"
423 "ror r23 \n\t"
424 "ror r22 \n\t"
425 "lsr r25 \n\t"
426 "ror r24 \n\t"
427 "ror r23 \n\t"
428 "ror r22 \n\t"
429 "lsr r25 \n\t"
430 "ror r24 \n\t"
431 "ror r23 \n\t"
432 "ror r22 \n\t"
433
434 "add r22, r18 \n\t" //x = x + q
435 "adc r23, r19 \n\t"
436 "adc r24, r20 \n\t"
437 "adc r25, r21 \n\t"
438
439 "movw r18, r22 \n\t" //q = x
440 "movw r20, r24 \n\t"
441 "add r18, r23 \n\t" //q = q + (x >> 8)
442 "adc r19, r24 \n\t"
443 "adc r20, r25 \n\t"
444 "adc r21, r1 \n\t"
445
446 "movw r18, r20 \n\t" //q = q >> 16
447 "eor r20, r20 \n\t"
448 "eor r21, r21 \n\t"
449 "add r18, r23 \n\t" //q = q + (x>>8)
450 "adc r19, r24 \n\t"
451 "adc r20, r25 \n\t"
452 "adc r21, r1 \n\t"
453 "add r18, r22 \n\t" //q = q + x
454 "adc r19, r23 \n\t"
455 "adc r20, r24 \n\t"
456 "adc r21, r25 \n\t"
457
458 "mov r18, r19 \n\t" //q = q >> 8
459 "mov r19, r20 \n\t"
460 "mov r20, r21 \n\t"
461 "eor r21, r21 \n\t"
462 "add r18, r22 \n\t" //q = q + x
463 "adc r19, r23 \n\t"
464 "adc r20, r24 \n\t"
465 "adc r21, r25 \n\t"
466
467 "andi r18, 0xF8 \n\t" //q = q & ~0x7
468
469 "sub r0, r18 \n\t" //in = in - q
470
471 "lsr r21 \n\t" //q = q >> 2
472 "ror r20 \n\t"
473 "ror r19 \n\t"
474 "ror r18 \n\t"
475 "lsr r21 \n\t"
476 "ror r20 \n\t"
477 "ror r19 \n\t"
478 "ror r18 \n\t"
479
480 "sub r0, r18 \n\t" //in = in - q
481 "st X, r0 \n\t" //mod = in;
482
483 "lsr r21 \n\t" //q = q >> 1
484 "ror r20 \n\t"
485 "ror r19 \n\t"
486 "ror r18 \n\t"
487
488 "st Z, r18 \n\t" //div = q
489 "std Z+1, r19 \n\t"
490 "std Z+2, r20 \n\t"
491 "std Z+3, r21 \n\t"
492
493 :
494 : "r"(in), "r"(&mod), "r"(&div)
495 : "r0", "r26", "r27", "r31", "r31");
496}
unsigned int divu24(unsigned int n)
unsigned int divu7(unsigned int n)
unsigned int divu10(unsigned int n)
Definition: FastDivision.cpp:3
void divmod10(uint32_t in, uint32_t &div, uint8_t &mod)
unsigned int divu60(unsigned int n)
unsigned int divu14(unsigned int n)
unsigned int divu11(unsigned int n)
unsigned int divu20(unsigned int n)
unsigned int divu5(unsigned int n)
unsigned int divu15(unsigned int n)
unsigned int divu9(unsigned int n)
unsigned int divu3(unsigned int n)